In [None]:
import numpy as np
import pandas as pd
import itertools


In [None]:
class Environment:
    
    def __init__(self,location_attributes,location_profiles,random_state=None):
        """
            Object that defines the enviroment of a simulation.
            
            The location attribues are fixed for each location and are used for high-level calibration
                of the appoximate distributions of key features.
                
            The location profiles are used to determine characteristics of individuals generated
                in population subgroups of each location.
                
            :param location_attributes: pandas.DataFrame with the following fields:
                location_name : (string) location identifier.
                density : (string) rural or urban.
                population : (int) Number of residents.
                employment_rate : (float) Rate between 0 and 1 of employed (vs. unemployed) residents.
                wealth_rate : (float) Rate between 0 and 1 of high-income (vs. low-income) residents.
                
            :param location_profiles: pandas.DataFrame with the following fields:
                location_name : (string) location identifier.
                wealth_status : (boolean) 0 represents low income and 1 represents high income.
                employment_status : (string) 0 represents unemployed and 1 represents employed.
                phoneownership_rate : (float) Probability between 0 and 1 that a resident owns a cellpone.
                worktravel_baseline : (float) Distance in miles of baseline for work-related travel.
                socialtravel_baseline : (float) Distance in miles of baseline for social-related travel.
                grocerytravel_baseline : (float) Distance in miles of baseline for grocery-related travel.
                
            :param random_state: integer representing the random state, or None.
                
            :return: Environment object.
        """
        
        # Set random state:
        np.random.seed(random_state)
        
        # Define lookup dictionaries:
        wealth_labels = {
            0 : "lowincome", False : "lowincome",
            1 : "highincome", True : "highincome",
        }
        employment_labels = {
            0 : "unemployed", False : "unemployed",
            1 : "employed", True : "employed",
        }
        
        # Define properties:
        self.location_attributes = location_attributes.copy()
        self.location_profiles = location_profiles.copy()
        self.location_names = []
        self.location_details = []
        self.people = []
        
        # Assign properties:
        for i,attribute_row in self.location_attributes.iterrows():
            self.location_names.append(attribute_row['location_name'])
            details_row = attribute_row.copy()
            # Add population estimates:
            details_row['lowincome_rate'] = (1-details_row['wealth_rate'])
            details_row['highincome_rate'] = (details_row['wealth_rate'])
            details_row['unemployed_rate'] = (1-details_row['employment_rate'])
            details_row['employed_rate'] = (details_row['employment_rate'])
            details_row['lowincome_population'] = attribute_row['population']*details_row['lowincome_rate']
            details_row['highincome_population'] = attribute_row['population']*details_row['highincome_rate']
            details_row['unemployed_population'] = attribute_row['population']*details_row['unemployed_rate']
            details_row['employed_population'] = attribute_row['population']*details_row['employed_rate']
            for i,profile_row in self.location_profiles.iterrows():
                # Build subgroup labels:
                wealth_label = wealth_labels[profile_row['wealth_status']]
                employment_label = employment_labels[profile_row['employment_status']]
                subgroup_label = "{}_{}".format(wealth_label,employment_label)
                # Add rates for this subgroup to detailed table:
                details_row[subgroup_label+'_'+'rate'] = details_row[wealth_label+'_'+'rate']*details_row[employment_label+'_'+'rate']
                details_row[subgroup_label+'_'+'population'] = attribute_row['population']*details_row[subgroup_label+'_'+'rate']
                for metric in [
                    'phoneownership_rate',
                    'worktravel_baseline',
                    'socialtravel_baseline',
                    'grocerytravel_baseline',
                ]:
                    details_row[subgroup_label+'_'+metric] = profile_row[metric]
                # Generate people according to specified distributions:
                pass
            self.location_details.append(details_row)
        self.location_details = pd.DataFrame(self.location_details)
        self.people = pd.DataFrame(self.people)
        
        # Unset random state:
        np.random.seed(None)
        
    def compute_statistics():
        
        pass

#class Location:
#    
#    def __init__(self,environment):
#        
#        # Store simulation context:
#        self.environment = environment
#        
#        # Simulate attributes attributes:
#        self.assign_characteristics()
#        self.assign_people()
#        
#    def assign_characteristics(self):
#        
#        # Demographic characteristics:
#        self.name = None
#        self.density = None
#        self.popuation = None
#        self.employment_rate = None
#        self.poverty_rate = None
#        self.phoneownership_rate_highincome = None
#        self.phoneownership_rate_lowincome = None
#        
#    def assign_people(self):
#        
#        self.people = []

class Person:
    
    def __init__(self,environment):
        
        # Store simulation context:
        self.environment = environment
        
        # Simulate attributes:
        self.assign_demographics()
        self.assign_behavior()
        
    def assign_demographics(self):
        
        # Demographics:
        self.location = None  # Location ID
        self.poverty = None  # Boolean.
        self.phones = None  # 0, 1, 2.
        self.compliance = None  # Boolean.

    def assign_behavior(self):
        
        # Behavior profile:
        self.work_movement_profile = None
        self.social_movement_profile = None
        self.grocery_movement_profile = None
        # Average weekly distance:
        self.work_movement_distance = None
        self.social_movement_distance = None
        self.grocery_movement_distance = None
        # Percent change (between -1.0 and +inf):
        self.work_movement_change = None
        self.social_movement_change = None
        self.grocery_movement_change = None
        # Percent of distance captured by phone1:
        self.work_movement_captured = None
        self.social_movement_captured = None
        self.grocery_movement_captured = None
        ## Percent of distance captured by phone2:
        #self.work_movement_captured2 = None
        #self.social_movement_captured2 = None
        #self.grocery_movement_captured2 = None

env = Environment(location_attributes, location_profiles, random_state=221)
print(env.location_names)

env.location_details


In [None]:
# Define fixed attributes for each location:
location_attributes = pd.DataFrame([
    { 'location_name':'loc1', 'density':'rural', 'population':3000, 'employment_rate':0.7, 'wealth_rate':0.6 },
    { 'location_name':'loc2', 'density':'rural', 'population':7000, 'employment_rate':0.7, 'wealth_rate':0.7 },
    { 'location_name':'loc3', 'density':'rural', 'population':4000, 'employment_rate':0.6, 'wealth_rate':0.4 },
    { 'location_name':'loc4', 'density':'rural', 'population':3000, 'employment_rate':0.6, 'wealth_rate':0.5 },
    { 'location_name':'loc5', 'density':'urban', 'population':1000, 'employment_rate':0.6, 'wealth_rate':0.5 },
    { 'location_name':'loc6', 'density':'urban', 'population':2000, 'employment_rate':0.6, 'wealth_rate':0.5 },
    { 'location_name':'loc7', 'density':'urban', 'population':3000, 'employment_rate':0.8, 'wealth_rate':0.5 },
    { 'location_name':'loc8', 'density':'urban', 'population':2000, 'employment_rate':0.7, 'wealth_rate':0.5 },
])
location_attributes


In [None]:
# Define profiles of sub-poluations for each location:
location_profiles = pd.DataFrame([
    { 'name':'loc1', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc1', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc1', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc1', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc2', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc2', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc2', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc2', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc3', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc3', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc3', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc3', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc4', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc4', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc4', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro':10, },
    { 'name':'loc4', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro':10, },
    { 'name':'loc5', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc5', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro': 5, },
    { 'name':'loc5', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc5', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro': 5, },
    { 'name':'loc6', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc6', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro': 5, },
    { 'name':'loc6', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc6', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro': 5, },
    { 'name':'loc7', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc7', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro': 5, },
    { 'name':'loc7', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc7', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro': 5, },
    { 'name':'loc8', 'wlth':0, 'empl':0, 'phone':0.7, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc8', 'wlth':0, 'empl':1, 'phone':0.7, 'wrk':100, 'soc':50, 'gro': 5, },
    { 'name':'loc8', 'wlth':1, 'empl':0, 'phone':0.9, 'wrk':  0, 'soc':50, 'gro': 5, },
    { 'name':'loc8', 'wlth':1, 'empl':1, 'phone':0.9, 'wrk':100, 'soc':50, 'gro': 5, },
])
location_profiles = location_profiles.rename(columns={
    'name' : 'location_name',
    'wlth' : 'wealth_status',
    'empl' : 'employment_status',
    'phone' : 'phoneownership_rate',
    'wrk' : 'worktravel_baseline',
    'soc' : 'socialtravel_baseline',
    'gro' : 'grocerytravel_baseline',
})

# Check validity of subgroup profiles based on hardcoded rules:
def check_loc_profiles(location_profiles,subgroup_cols):
    assert pd.isnull(location_profiles).sum().sum()==0, "Location profile table has blank values."
    for location_name,subgroup_profiles in location_profiles.groupby('location_name'):
        # Get list of possible values in each grouping column:
        possible_values = []
        for col in subgroup_cols:
            vals = sorted(set(location_profiles[col]))
            possible_values.append( vals )
        # Get all possible combinations of grouping columns:
        possible_combos = list(itertools.product(*possible_values))
        possible_combos = [tuple(possible_combo) for possible_combo in possible_combos]
        # Make sure that each combination is represented:
        def subgroup_label(group_cols,group_vals):
            group_cols = np.array([group_cols]).flatten()
            group_vals = np.array([group_vals]).flatten()
            label = [
                "{}={}".format(group_col,group_val)
                for group_col,group_val in zip(group_cols,group_vals)
            ]
            label = ",".join(label)
            return label
        actual_combos = subgroup_profiles[subgroup_cols].to_records(index=False)
        actual_combos = [tuple(actual_combo) for actual_combo in actual_combos]
        for possible_combo in possible_combos:
            assert possible_combo in actual_combos, "Missing combination in {}: {}".format(
                location_name,subgroup_label(subgroup_cols,possible_combo)
            )
        # Verify values:
        def verify_unique(value_col,condition_cols):
            if (condition_cols is None) or (len(condition_cols)==0):
                # Unconditional:
                vals = set(subgroup_profiles[value_col])
                assert len(vals)==1, "Found multiple values for column {} : {}".format(
                    value_col,vals
                )
            else:
                # Conditional:
                for g,grp in subgroup_profiles.groupby(condition_cols):
                    vals = set(grp[value_col])
                    assert len(vals)==1, "Found multiple values for column {} conditional on {}: {}".format(
                        value_col,subgroup_label(condition_cols,g),vals
                    )
        # Make sure that phoneownership_rate depends only on wealth_status (for this locaiton):
        verify_unique('phoneownership_rate',['wealth_status'])
        # Make sure that worktravel_baseline depends only on employment_status (for this locaiton):
        verify_unique('worktravel_baseline',['employment_status'])
        # Make sure that worktravel_baseline depends only on wealth_status (for this locaiton):
        verify_unique('socialtravel_baseline',['wealth_status'])
        # Make sure that grocerytravel_baseline is unconditional (for this locaiton):
        verify_unique('grocerytravel_baseline',None)

check_loc_profiles(location_profiles, subgroup_cols=['wealth_status','employment_status'])

location_profiles


In [None]:
def calc_global_rate(location_profiles,metric_col):
    numerator = sum(location_profiles['population']*location_profiles[metric_col])
    denominator = sum(location_profiles['population'])
    return numerator/denominator
for metric_col in ['employment_rate','poverty_rate','baseline_travel','phoneownership_rate']:
    print("{: <20} : {}".format(metric_col,calc_global_rate(location_profiles,metric_col=metric_col)))


In [None]:
np.random.seed(221)
