# Currency Estimation Network Usage Example

In [1]:
from src.abstract_node import *
from src.shelflife.conditional_shelflife_node import *
from src.dependency.aggregator_node import *
from src.changepoint.cusum_node import *
from src.currency_network import *

In [2]:
# Sample data based on fictional individuals (replication of paper example)
sample_data = {"id": [1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2],
                "year": [2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007],
                "gender": ['f','f','f','f','f','f','f','f','m','m','m','m','m','m','m','m'],
                "age": [30, 31, 32, 33, 34, 35, 36, 37, 25, 26, 27, 28, 29, 30, 31, 32],
                "age_bin": ['30-34','30-34','30-34','30-34','35-39','35-39','35-39','35-39','25-29','25-29','25-29','25-29','25-29','30-34','30-34','30-34'],
                "children": [0, 0, 0, 0, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0],
                "marital_status": ['s', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's', 's']}
dataframe = pd.DataFrame(data=sample_data)
dataframe

Unnamed: 0,id,year,gender,age,age_bin,children,marital_status
0,1,2000,f,30,30-34,0,s
1,1,2001,f,31,30-34,0,s
2,1,2002,f,32,30-34,0,s
3,1,2003,f,33,30-34,0,s
4,1,2004,f,34,35-39,1,s
5,1,2005,f,35,35-39,1,s
6,1,2006,f,36,35-39,1,s
7,1,2007,f,37,35-39,2,s
8,2,2000,m,25,25-29,0,s
9,2,2001,m,26,25-29,0,s


In [None]:
# Sample marriage rates map for the Conditional Shelf Life Node
marriage_rates_map = {('0-24', 'm'): 0.01,
                    ('25-29', 'm'): 0.05,
                    ('30-34', 'm'): 0.1,
                    ('35-39', 'm'): 0.05,
                    ('40-44', 'm'): 0.03,
                    ('45-49', 'm'): 0.02,
                    ('50-59', 'm'): 0.01,
                    ('60-69', 'm'): 0.001,
                    ('70-100', 'm'): 0.001,
                    ('0-24', 'f'): 0.02,
                    ('25-29', 'f'): 0.06,
                    ('30-34', 'f'): 0.09,
                    ('35-39', 'f'): 0.07,
                    ('40-44', 'f'): 0.02,
                    ('45-49', 'f'): 0.01,
                    ('50-59', 'f'): 0.01,
                    ('60-69', 'f'): 0.002,
                    ('70-100', 'f'): 0.002}

In [None]:
# Instantiating all nodes and the estimation network
# Nodes
age_bin_node = DataNode(attribute='age_bin', prior={'0-24': 0.05, '25-29': 0.2, '30-34': 0.2, '35-39': 0.15, '40-44': 0.1, '45-49': 0.05, '50-59': 0.1, '60-69': 0.1, '70-100': 0.05})
gender_node = DataNode(attribute='gender', prior={'f': 0.5, 'm': 0.5})
relationship_node = ConditionalShelfLife.from_map(attribute='marital_status', map= marriage_rates_map, parents={gender_node, age_bin_node})
child_sum_node = CUSUMPoisson(attribute='children', a=1, b=1, ground_lambda=0.01, alt_lambda=1)
agg_relationship_node = Aggregator(attribute='marital_status', parents={relationship_node, child_sum_node}, aggregator = BasicQuantifier.AVERAGE)

# Network
singles_network = Network(nodes=[age_bin_node, gender_node, relationship_node, child_sum_node, agg_relationship_node])

In [None]:
# overview of the created network nodes
singles_network.list_nodes()

Nodes in network:
0: CUSUM Poisson node for attribute 'children' (with parents: [])
1: Data node for attribute 'gender' (with parents: [])
2: Data node for attribute 'age_bin' (with parents: [])
3: Conditional Geometric Shelf Life Node (with parents: ['age_bin', 'gender'])
4: Quantified Aggregator Node (with parents: ['children', 'marital_status'])


In [None]:
''' 
If we want to estimate the currency for each individual in the dataframe,
we can loop over the unique IDs and store the results in a dictionary.
'''
currency_per_id = {}
for id in dataframe['id'].unique():
    singles_network.clear()
    person_data = dataframe[dataframe['id'] == id][['year', 'age_bin', 'gender', 'children', 'marital_status']]
    result = singles_network.estimate(data=person_data, time_column='year')
    currency_per_id[int(id)] = result

# the output dictionary
currency_per_id

{1: {'children_currency': {np.int64(2000): 1.0,
   np.int64(2001): 1.0,
   np.int64(2002): 1.0,
   np.int64(2003): 1.0,
   np.int64(2004): np.float64(0.0014475002098330858),
   np.int64(2005): np.float64(1.049145968656795e-06),
   np.int64(2006): np.float64(7.598698337218934e-10),
   np.int64(2007): np.float64(0.0)},
  'marital_status_currency': {np.int64(2000): 1.0,
   np.int64(2001): 0.95905,
   np.int64(2002): 0.9217854999999999,
   np.int64(2003): 0.8878748049999999,
   np.int64(2004): np.float64(0.3624167347549164),
   np.int64(2005): np.float64(0.33734441629748424),
   np.int64(2006): np.float64(0.3146992356837198),
   np.int64(2007): np.float64(0.29363970483252)}},
 2: {'children_currency': {np.int64(2000): 1.0,
   np.int64(2001): 1.0,
   np.int64(2002): 1.0,
   np.int64(2003): 1.0,
   np.int64(2004): 1.0,
   np.int64(2005): 1.0,
   np.int64(2006): 1.0,
   np.int64(2007): 1.0},
  'marital_status_currency': {np.int64(2000): 1.0,
   np.int64(2001): 0.9762500000000001,
   np.int64(

In [None]:
# for visualization, we add the estimated currency values for 2007 to the original dataframe
generated_data = dataframe[dataframe['year'] == 2007].copy()
for id in generated_data['id']:
    generated_data.loc[generated_data['id'] == id, 'marital_status_currency_2007'] = currency_per_id[id]['marital_status_currency'][2007]

generated_data

Unnamed: 0,id,year,gender,age,age_bin,children,marital_status,marital_status_currency_2007
7,1,2007,f,37,35-39,2,s,0.29364
15,2,2007,m,32,30-34,0,s,0.831879
