In [1]:
%load_ext autoreload
%autoreload 2

In [22]:
cd ../output

/Users/jeff.doyle/work/bca4abm/tutorials/output


In [3]:
import orca
import pandas as pd
import numpy as np
import os

In [4]:
df_species = pd.DataFrame(
    {'species_name': ['dog', 'cat'],
    'age_rate': [7, 5]},
    index=['D', 'C'])

df_species

Unnamed: 0,age_rate,species_name
D,7,dog
C,5,cat


In [5]:
df_pet = pd.DataFrame(
    {'pet_name': ['wilkie', 'lassie', 'leo', 'felix', 'rex' ],
    'age': [0, 0, 0, 0, 0 ],
     'species_id': ['D', 'D', 'C', 'C', 'D']},
    index=['p0', 'p1', 'p2', 'p3', 'p4'])

df_pet

Unnamed: 0,age,pet_name,species_id
p0,0,wilkie,D
p1,0,lassie,D
p2,0,leo,C
p3,0,felix,C
p4,0,rex,D


In [6]:
# register the tables
orca.add_table('species', df_species)
orca.add_table('pets', df_pet)

# broadcast merge
orca.broadcast(cast='species', onto='pets', cast_index=True, onto_on='species_id')

### this is the merged table
orca.merge_tables('pets', tables=['pets', 'species'], columns=['pet_name', 'age', 'species_name', 'age_rate'])

Unnamed: 0,age,pet_name,species_id,age_rate,species_name
p0,0,wilkie,D,7,dog
p1,0,lassie,D,7,dog
p4,0,rex,D,7,dog
p2,0,leo,C,5,cat
p3,0,felix,C,5,cat


In [7]:
# this is a common merge so might as well define it once here and use it
@orca.table()
def pets_merged(pets, species):
    return orca.merge_tables(pets.name,
        tables=[pets, species],
        columns=['pet_name', 'age', 'species_name', 'age_rate'])

In [8]:
# this is the orca registered version of the merged table
orca.get_table('pets_merged').to_frame()

Unnamed: 0,age,pet_name,species_id,age_rate,species_name
p0,0,wilkie,D,7,dog
p1,0,lassie,D,7,dog
p4,0,rex,D,7,dog
p2,0,leo,C,5,cat
p3,0,felix,C,5,cat


In [9]:
# create a step to age pets at a rate appropriate to their species
@orca.step()
def age_simulate(pets):
    new_age = pets.age + orca.get_table('pets_merged').age_rate
    pets.update_col_from_series('age', new_age)

# create a second step to illustrate how pipelining works
@orca.step()
def summarize(pets, iter_var):
    print '*** i = {} ***'.format(iter_var)
    print pets.to_frame()[['pet_name', 'age']]

In [11]:
# now lets run an orca pipeline

# data_out (optional) is the filename of pandas HDF data store to which all tables injected into any step will be saved
hdf_output_filename = 'run_orca.h5'
orca.run(['age_simulate', 'summarize'], iter_vars=range(2010, 2015), data_out=hdf_output_filename)

Running iteration 1 with iteration value 2010
Running step 'age_simulate'
Time to execute step 'age_simulate': 0.00 s
Running step 'summarize'
*** i = 2010 ***
   pet_name  age
p0   wilkie    7
p1   lassie    7
p2      leo    5
p3    felix    5
p4      rex    7
Time to execute step 'summarize': 0.00 s
Total time to execute iteration 1 with iteration value 2010: 0.01 s
Running iteration 2 with iteration value 2011
Running step 'age_simulate'
Time to execute step 'age_simulate': 0.00 s
Running step 'summarize'
*** i = 2011 ***
   pet_name  age
p0   wilkie   14
p1   lassie   14
p2      leo   10
p3    felix   10
p4      rex   14
Time to execute step 'summarize': 0.00 s
Total time to execute iteration 2 with iteration value 2011: 0.01 s
Running iteration 3 with iteration value 2012
Running step 'age_simulate'
Time to execute step 'age_simulate': 0.01 s
Running step 'summarize'
*** i = 2012 ***
   pet_name  age
p0   wilkie   21
p1   lassie   21
p2      leo   15
p3    felix   15
p4      rex  


Time to execute step 'summarize': 0.00 s
Total time to execute iteration 4 with iteration value 2013: 0.01 s
Running iteration 5 with iteration value 2014
Running step 'age_simulate'
Time to execute step 'age_simulate': 0.00 s
Running step 'summarize'
*** i = 2014 ***
   pet_name  age
p0   wilkie   35
p1   lassie   35
p2      leo   25
p3    felix   25
p4      rex   35
Time to execute step 'summarize': 0.00 s
Total time to execute iteration 5 with iteration value 2014: 0.01 s


In [12]:
# lets inspect the output
store = pd.HDFStore(hdf_output_filename)
store

<class 'pandas.io.pytables.HDFStore'>
File path: run_orca.h5
/2011/pets            frame        (shape->[5,3])
/2012/pets            frame        (shape->[5,3])
/2013/pets            frame        (shape->[5,3])
/2014/pets            frame        (shape->[5,3])
/base/pets            frame        (shape->[5,3])

In [13]:
store['/2014/pets']

Unnamed: 0,age,pet_name,species_id
p0,35,wilkie,D
p1,35,lassie,D
p2,25,leo,C
p3,25,felix,C
p4,35,rex,D


In [None]:
store.close()