In [4]:
# Standard Libraries
import numpy as np
import random
from collections import defaultdict

# ABM Framework (Mesa)
from mesa import Agent, Model
# FIX: Use the 'schedule' submodule, which is the correct path for your installation
from mesa.time import RandomActivation 
from mesa.datacollection import DataCollector


# --- GLOBAL MODEL PARAMETERS (Synthesized from Thesis Data) ---

# Contextual Parameters (from Appendix E & Chapter 3.2)
ANNUAL_SWM_BUDGET = 1_500_000 # ₱1,500,000
QUARTERLY_BUDGET = ANNUAL_SWM_BUDGET / 4 # ₱375,000
NUM_BARANGAYS = 7
NUM_EPISODES = 10000 # Number of training runs for the RL agent
STEPS_PER_EPISODE = 4 # 4 quarters per year (episode)

# Barangay Data (Simplified placeholder for 7 barangays)
# Keys are names; values are tuples (Total_Households, Local_SWM_Budget, Initial_Compliance_Estimate)
BARANGAY_DATA = {
    "LianganEast": (608, 30000, 0.65), # 60-70% from interview, used for calibration
    "Poblacion": (700, 40000, 0.10),
    "Esperanza": (550, 25000, 0.08),
    "Binuni": (400, 20000, 0.05), # Likely lower for inland/hard-to-reach areas
    "Demologan": (450, 22000, 0.07),
    "Mati": (500, 30000, 0.12),
    "Babalaya": (350, 18000, 0.09),
}

# Cost Parameter Estimation (Chapter 3.2.4)
# Placeholder costs derived from proxy estimation (e.g., regional minimum wage for enforcers)
COST_PER_ENFORCER_QUARTER = 30000 # Estimated salary/operations for one enforcer (FTE)
COST_PER_IEC_INTENSITY_UNIT = 10000 # Estimated cost of 1 week of radio spots/materials
PAYOUT_PER_COMPLIANT_HOUSEHOLD = 50 # ₱50 incentive (goods/rice)

# RL Reward Weights (Chapter 3.4.3) - Tuned during validation
ALPHA_COMPLIANCE = 10.0 # High weight for compliance (primary goal)
BETA_COST = 0.00001 # Small cost penalty to encourage efficiency
GAMMA_DEFICIT = 50.0 # Large penalty for exceeding the budget (fiscal discipline)

# TPB Parameter Defaults (wA, wSN, wPBC) (Chapter 3.2.1)
# These are synthesized from literature and calibrated to match 10% LGU estimate.
# Note: Income/Education weights are implicit in how UPolicy(I) and 'A' are calculated.
TPB_WEIGHTS = {
    'wA': 0.45,  # Attitude
    'wSN': 0.35, # Subjective Norms (High social influence in PH context)
    'wPBC': 0.20 # Perceived Behavioral Control
}

# Psychological Reactance Threshold (Chapter 3.2.1)
REACTANCE_THRESHOLD = 0.25 # If >25% of neighbors are fined, Attitude (A) is reduced.

In [5]:
class HouseholdAgent(Agent):
    """
    Represents a household whose decision to segregate is based on the 
    Theory of Planned Behavior (TPB) and LGU policy utility.
    """
    def __init__(self, unique_id, model, initial_income, initial_edu_level, barangay_id):
        super().__init__(unique_id, model)
        self.barangay_id = barangay_id
        self.income = initial_income # Modulates sensitivity to financial policies
        self.education = initial_edu_level
        self.is_compliant = 0 # 1 if compliant, 0 otherwise
        
        # Internal TPB Constructs (A, SN, PBC - initialized based on synthetic data)
        self.attitude = random.uniform(0.3, 0.7)
        self.subj_norm = random.uniform(0.2, 0.6)
        self.perc_b_control = random.uniform(0.4, 0.8)

    def calculate_utility(self, fine_magnitude, incentive_payout, neighborhood_compliance):
        """
        USegregate = (wA*A + wSN*SN + wPBC*PBC) + UPolicy(I) + epsilon
        (Chapter 2.3.1, Fig. 3.1)
        """
        wA, wSN, wPBC = TPB_WEIGHTS['wA'], TPB_WEIGHTS['wSN'], TPB_WEIGHTS['wPBC']
        
        # 1. Psychological Component (Internal Decision Drivers)
        Psych_Utility = (wA * self.attitude) + \
                        (wSN * self.subj_norm) + \
                        (wPBC * self.perc_b_control)
        
        # 2. Policy Component (Objective Cost/Benefit) - UPolicy(I)
        # Note: Sensitivity is modulated by income (I)
        # Low income (I) -> higher sensitivity to fine/incentive magnitude
        income_sensitivity = 1.0 - (self.income / self.model.MAX_INCOME_PROXY) 

        # Fine disutility (negative) - if not compliant, assume risk of fine
        # Fine disutility is higher for low-income households
        Fine_Disutility = -fine_magnitude * income_sensitivity * self.model.ENFORCEMENT_PERCEPTION_RATE 
        
        # Incentive utility (positive) - perceived reward for compliance
        Incentive_Utility = incentive_payout * (1.0 - income_sensitivity) # Less sensitive if high income
        
        Policy_Utility = Fine_Disutility + Incentive_Utility

        # 3. Stochastic Component (Epsilon)
        epsilon = random.gauss(0, 0.1) # Gaussian noise term

        USegregate = Psych_Utility + Policy_Utility + epsilon
        return USegregate

    def update_tpb_constructs(self, policy_action, neighborhood_fined_rate, avg_compliance):
        """
        Updates A, SN, and PBC based on LGU investment and social observation.
        (Chapter 3.3.1)
        """
        
        # 1. Update Attitude (A) based on IEC and Reactance
        # IEC Investment increases A (positive influence)
        self.attitude += 0.01 * policy_action['IEC'] * self.model.IEC_EFFECTIVENESS
        
        # Psychological Reactance (Negative influence)
        if neighborhood_fined_rate > REACTANCE_THRESHOLD:
            self.attitude -= 0.05 * (neighborhood_fined_rate - REACTANCE_THRESHOLD)
            
        # 2. Update Subjective Norm (SN) based on observed compliance
        # Observed compliance (social influence) increases SN
        self.subj_norm = (0.8 * self.subj_norm) + (0.2 * avg_compliance)
        
        # 3. PBC is updated by the LGU/Barangay's infrastructure provision (Simplified)
        # Assuming high IEC/Enforcement indicates LGU dedication, which enhances PBC
        self.perc_b_control += 0.005 * (policy_action['IEC'] + policy_action['ENFORCE'])

        # Clamp values
        self.attitude = np.clip(self.attitude, 0, 1)
        self.subj_norm = np.clip(self.subj_norm, 0, 1)
        self.perc_b_control = np.clip(self.perc_b_control, 0, 1)

    def step(self, fine_magnitude, incentive_payout, neighborhood_compliance):
        """Decision to segregate or not."""
        utility = self.calculate_utility(fine_magnitude, incentive_payout, neighborhood_compliance)

        # Decision threshold: if utility is positive, the agent segregates (is compliant)
        if utility > 0.5: # 0.5 is an arbitrary threshold; can be calibrated
            self.is_compliant = 1
        else:
            self.is_compliant = 0
            
        return self.is_compliant

In [6]:
class BarangayAgent(Agent):
    """
    Intermediate implementation layer. Receives LGU funds and manages local 
    implementation (enforcement, IEC, incentives).
    """
    def __init__(self, unique_id, model, name, households, local_budget, initial_compliance):
        super().__init__(unique_id, model)
        self.name = name
        self.households = households # List of HouseholdAgents in this barangay
        self.local_budget = local_budget # Local SWM budget (e.g., ₱30k for Liangan East)
        self.current_compliance_rate = initial_compliance
        self.last_enforcement_fined_count = 0
        self.policy_allocation = {'IEC': 0, 'ENFORCE': 0, 'INCENTIVE': 0}
        
    def aggregate_and_report(self):
        """Aggregates household compliance and calculates local policy effects."""
        
        # 1. Calculate Aggregate Compliance
        compliant_count = sum(h.is_compliant for h in self.households)
        self.current_compliance_rate = compliant_count / len(self.households)
        
        # 2. Calculate Enforcement/Fine Effectiveness
        # A compliant agent is not fined. A non-compliant agent is fined based on enforcement spending.
        non_compliant_count = len(self.households) - compliant_count
        
        # Enforcement effectiveness is proportional to spending.
        enforcement_staff = self.policy_allocation['ENFORCE'] / COST_PER_ENFORCER_QUARTER
        
        # Probability of being caught/fined (simulated based on staff count)
        fined_prob = min(1.0, 0.05 * enforcement_staff) 
        
        # Number of households fined this quarter
        fined_count = sum(1 for _ in range(non_compliant_count) if random.random() < fined_prob)
        self.last_enforcement_fined_count = fined_count

        # Calculate incentive cost and successful enforcement cost
        incentive_cost = compliant_count * PAYOUT_PER_COMPLIANT_HOUSEHOLD
        enforcement_revenue = fined_count * self.model.LGU_FINE_MAGNITUDE # Revenue from fines
        
        return self.current_compliance_rate, self.last_enforcement_fined_count, incentive_cost, enforcement_revenue

In [7]:
class LGURLAgent(Agent):
    """
    The Municipal LGU agent, which uses Q-learning to determine the optimal 
    budget allocation strategy (the action At).
    """
    def __init__(self, unique_id, model, rl_config):
        super().__init__(unique_id, model)
        self.lr = rl_config['learning_rate']
        self.gamma = rl_config['discount_factor']
        self.epsilon = rl_config['epsilon']
        self.action_space = rl_config['action_space'] # The discrete policy options
        self.q_table = defaultdict(lambda: np.zeros(len(self.action_space)))
        self.last_state = None
        self.last_action_idx = None
        
    def get_state(self, model):
        """
        Defines the State Vector St = [Compliancet, AllocationPrev, BudgetRem, Quartert]
        (Chapter 3.4.1)
        """
        # 1. Compliance (7 values)
        compliance_vector = tuple(b.current_compliance_rate for b in model.barangays.values())
        
        # 2. Previous Allocation (Simplified placeholder for 21 values)
        # Simplification: Use total spending per lever, not all 21 values, for discrete Q-Learning state
        prev_alloc_iec = sum(b.policy_allocation['IEC'] for b in model.barangays.values())
        prev_alloc_enforce = sum(b.policy_allocation['ENFORCE'] for b in model.barangays.values())
        prev_alloc_incentive = sum(b.policy_allocation['INCENTIVE'] for b in model.barangays.values())
        
        # 3. Budget Remaining (Discretized)
        budget_remaining = model.annual_budget_remaining
        budget_bin = int(budget_remaining // 100000) # Discretize in ₱100k bins
        
        # 4. Current Quarter
        quarter = model.schedule.steps % STEPS_PER_EPISODE
        
        # The state is a tuple for use as a dictionary key in the Q-table
        return compliance_vector + (budget_bin, quarter) 

    def choose_action(self, state):
        """Epsilon-greedy strategy to select one of the discrete policy options."""
        if random.random() < self.epsilon:
            # Exploration: Choose a random discrete action
            action_idx = random.randrange(len(self.action_space))
        else:
            # Exploitation: Choose the best action from Q-table
            action_idx = np.argmax(self.q_table[state])
            
        self.last_state = state
        self.last_action_idx = action_idx
        return self.action_space[action_idx]

    def update_Q(self, new_state, reward):
        """Q-Learning update rule."""
        old_value = self.q_table[self.last_state][self.last_action_idx]
        next_max = np.max(self.q_table[new_state])
        
        # Q(S, A) = Q(S, A) + LR * (Reward + Gamma * max(Q(S', a')) - Q(S, A))
        new_value = old_value + self.lr * (reward + self.gamma * next_max - old_value)
        self.q_table[self.last_state][self.last_action_idx] = new_value

In [8]:
class SWMModel(Model):
    """The main Agent-Based Model environment for SWM policy simulation."""
    def __init__(self, rl_config, fine_magnitude=500):
        super().__init__()
        self.schedule = RandomActivation(self)
        self.running = True
        self.MAX_INCOME_PROXY = 50000 # Max income proxy for sensitivity calculation
        self.ENFORCEMENT_PERCEPTION_RATE = 0.5 # Placeholder for how often households perceive enforcement
        self.IEC_EFFECTIVENESS = 0.05
        self.LGU_FINE_MAGNITUDE = fine_magnitude # ₱500 per citation (from Ordinance D)

        # RL Parameters
        self.rl_agent = LGURLAgent(999, self, rl_config)
        self.annual_budget_remaining = ANNUAL_SWM_BUDGET
        self.quarterly_budget = QUARTERLY_BUDGET
        
        # Initialization
        self.current_id = 0
        self.barangays = {}
        self.household_agents = []
        self._initialize_agents()
        
        # Data Collector setup
        self.datacollector = DataCollector(
            model_reporters={"AvgCompliance": lambda m: np.mean([b.current_compliance_rate for b in m.barangays.values()]),
                             "TotalCost": lambda m: m.last_cost,
                             "RLReward": lambda m: m.last_reward,
                             "BudgetAllocIEC": lambda m: m.last_alloc['IEC'],
                             "BudgetAllocENFORCE": lambda m: m.last_alloc['ENFORCE'],
                             "BudgetAllocINCENTIVE": lambda m: m.last_alloc['INCENTIVE'],
                            },
            agent_reporters={} # Can be used to track individual household behavior
        )
        self.last_cost = 0
        self.last_reward = 0
        self.last_alloc = {'IEC': 0, 'ENFORCE': 0, 'INCENTIVE': 0}

    def _initialize_agents(self):
        """Initializes all Household and Barangay Agents."""
        for i, (name, data) in enumerate(BARANGAY_DATA.items()):
            num_h, local_budget, initial_compliance = data
            barangay_agent = BarangayAgent(i, self, name, [], local_budget, initial_compliance)
            self.schedule.add(barangay_agent)
            self.barangays[name] = barangay_agent
            
            # Create Household Agents for this barangay
            for _ in range(num_h):
                self.current_id += 1
                # Simplified income/education proxy
                income = random.uniform(10000, self.MAX_INCOME_PROXY)
                edu_level = random.choice([0, 1, 2]) # Low, Mid, High
                h_agent = HouseholdAgent(self.current_id, self, income, edu_level, name)
                self.household_agents.append(h_agent)
                barangay_agent.households.append(h_agent)
                self.schedule.add(h_agent)

    def step(self):
        """
        Advance the model by one quarter (one RL step).
        (Chapter 3.3)
        """
        
        # --- 1. RL Agent makes Policy Decision (Action) ---
        current_state = self.rl_agent.get_state(self)
        policy_vector = self.rl_agent.choose_action(current_state) # 21-D vector
        
        # Process the 21-D action vector into 7 barangay allocations
        total_spending = 0
        policy_breakdown = defaultdict(lambda: {'IEC': 0, 'ENFORCE': 0, 'INCENTIVE': 0})
        
        # The RL action vector is structured: [IEC_B1, E_B1, I_B1, IEC_B2, ...]
        for i, b_name in enumerate(BARANGAY_DATA.keys()):
            b_agent = self.barangays[b_name]
            
            # Slice the 21-D vector (3 values per barangay)
            alloc_iec = policy_vector[i * 3 + 0]
            alloc_enforce = policy_vector[i * 3 + 1]
            alloc_incentive = policy_vector[i * 3 + 2]

            # Assign allocations to the Barangay Agent for use in the step
            b_agent.policy_allocation['IEC'] = alloc_iec
            b_agent.policy_allocation['ENFORCE'] = alloc_enforce
            b_agent.policy_allocation['INCENTIVE'] = alloc_incentive
            
            total_spending += alloc_iec + alloc_enforce + alloc_incentive

        # --- 2. Household Agents make Segregation Decisions ---
        # Run household steps first
        self.schedule.step()
        
        # --- 3. Barangay Agents Aggregate and Update ---
        total_compliance = 0
        total_households = 0
        total_fined = 0
        total_incentive_cost = 0
        total_enforcement_revenue = 0
        
        for b_agent in self.barangays.values():
            # Get neighborhood context for Household Agent updates
            neighborhood_compliance = b_agent.current_compliance_rate
            
            # Aggregation and Reporting
            compliance, fined_count, incentive_cost, enforcement_revenue = b_agent.aggregate_and_report()
            
            total_compliance += compliance * len(b_agent.households)
            total_households += len(b_agent.households)
            total_fined += fined_count
            total_incentive_cost += incentive_cost
            total_enforcement_revenue += enforcement_revenue

            # Update Household TPB constructs based on LGU action
            fined_rate = fined_count / len(b_agent.households) if len(b_agent.households) > 0 else 0
            for h_agent in b_agent.households:
                h_agent.update_tpb_constructs(b_agent.policy_allocation, fined_rate, neighborhood_compliance)

        # --- 4. RL Agent Calculates Reward and Updates Q-Table ---
        avg_compliance = total_compliance / total_households
        
        # Total cost is LGU spending plus the dynamic incentive payout
        actual_total_cost = total_spending + total_incentive_cost 
        budget_deficit = max(0, actual_total_cost - self.quarterly_budget)
        
        # Reward Function (Rt = α*Compliance - β*TotalCost - γ*BudgetDeficit)
        reward = (ALPHA_COMPLIANCE * avg_compliance) - \
                 (BETA_COST * actual_total_cost) - \
                 (GAMMA_DEFICIT * budget_deficit)
        
        new_state = self.rl_agent.get_state(self)
        self.rl_agent.update_Q(new_state, reward)
        
        # Update annual budget remaining for the next step's state
        self.annual_budget_remaining -= actual_total_cost
        
        # Reset budget and end episode if the year is over
        if self.schedule.steps % STEPS_PER_EPISODE == 0:
            self.annual_budget_remaining = ANNUAL_SWM_BUDGET
            
        # Store metrics for data collection
        self.last_cost = actual_total_cost
        self.last_reward = reward
        self.last_alloc = {'IEC': sum(b.policy_allocation['IEC'] for b in self.barangays.values()),
                           'ENFORCE': sum(b.policy_allocation['ENFORCE'] for b in self.barangays.values()),
                           'INCENTIVE': sum(b.policy_allocation['INCENTIVE'] for b in self.barangays.values())}

        self.datacollector.collect(self)

In [9]:
# --- RL Action Space Definition (Chapter 3.4.2) ---

# For discrete Q-Learning, the 21-D action space must be manageable. 
# We simplify the decision to allocation across 3 spending levels (Low, Med, High)
# for the 3 policy levers, applied uniformly to all 7 barangays initially,
# and then expanding the action space for the final analysis.

# Policy Levers and Spending Granularity (total quarterly budget ₱375,000)
# Low: ₱50k, Med: ₱125k, High: ₱200k (These are illustrative spending levels)

# Function to generate a simplified 21-D action vector for a single scenario type.
def generate_scenario_actions(iec_enabled, enforce_enabled, incentive_enabled):
    """Generates the 21-D action vectors for the specified regime."""
    # Simplified total quarterly spending options (to keep Q-table size small for example)
    spending_levels = [0, 50000, 100000] 
    
    actions = []
    
    # Iterate over all possible combinations of spending across 3 levers (IEC, ENFORCE, INCENTIVE)
    for s_iec in spending_levels:
        if not iec_enabled and s_iec > 0: continue
        for s_enforce in spending_levels:
            if not enforce_enabled and s_enforce > 0: continue
            for s_incentive in spending_levels:
                if not incentive_enabled and s_incentive > 0: continue
                
                total_alloc = s_iec + s_enforce + s_incentive
                if total_alloc <= QUARTERLY_BUDGET: # Respect budget constraint
                    
                    # Distribute this total allocation uniformly across 7 barangays
                    # In the final model, this should be a 21-D vector, but for initial training, we simplify.
                    
                    # The actual 21-D vector: [IEC_B1, E_B1, I_B1, ..., IEC_B7, E_B7, I_B7]
                    action_vector = []
                    for _ in range(NUM_BARANGAYS):
                        action_vector.extend([s_iec/NUM_BARANGAYS, s_enforce/NUM_BARANGAYS, s_incentive/NUM_BARANGAYS])
                    
                    actions.append(tuple(action_vector)) # Must be a tuple for the Q-table key
                    
    return actions

# 1. Pure Incentive Regime (IEC + Incentive only)
# Disabling ENFORCE for all 7 barangays
PURE_INCENTIVE_ACTIONS = generate_scenario_actions(True, False, True)

# 2. Pure Penalty Regime (IEC + Enforcement only)
# Disabling INCENTIVE for all 7 barangays
PURE_PENALTY_ACTIONS = generate_scenario_actions(True, True, False)

# 3. Hybrid Regime (IEC + Enforcement + Incentive)
# All levers are active
HYBRID_ACTIONS = generate_scenario_actions(True, True, True)

# Select the regime to train for in this example run
RL_ACTION_SPACE = HYBRID_ACTIONS 

RL_CONFIG = {
    'learning_rate': 0.1,
    'discount_factor': 0.9,
    'epsilon': 1.0, # Start with high exploration
    'epsilon_decay': 0.9995,
    'epsilon_min': 0.01,
    'action_space': RL_ACTION_SPACE
}

print(f"Total actions in the selected RL space: {len(RL_ACTION_SPACE)}")

Total actions in the selected RL space: 27


In [10]:
# --- SIMULATION TRAINING LOOP ---

def run_rl_training(model_config, num_episodes):
    """Runs the full RL training and returns the collected data."""
    
    model = SWMModel(model_config)
    results = []
    
    print("Starting RL Training...")
    
    for episode in range(num_episodes):
        model.annual_budget_remaining = ANNUAL_SWM_BUDGET # Reset budget annually
        
        # Decay epsilon to shift from exploration to exploitation
        model.rl_agent.epsilon = max(model.rl_agent.epsilon * model_config['epsilon_decay'], 
                                     model_config['epsilon_min'])
        
        # Run one 'episode' (4 quarters/steps)
        for _ in range(STEPS_PER_EPISODE):
            model.step()
        
        # Record final metrics for the episode
        df = model.datacollector.get_model_vars_dataframe()
        results.append(df.iloc[-1]) # Store the end-of-year state
        
        if (episode + 1) % 1000 == 0:
            print(f"Episode {episode + 1}/{num_episodes} completed. Current Epsilon: {model.rl_agent.epsilon:.4f}")
            
    return model, results


# Run the training (Set a lower number for faster testing)
# trained_model, training_results = run_rl_training(RL_CONFIG, num_episodes=5000)

# Example run with a reduced number of episodes for the notebook demonstration
trained_model, training_results = run_rl_training(RL_CONFIG, num_episodes=500) 

results_df = pd.DataFrame(training_results)

print("\n--- Training Complete ---")

# --- PLOTTING AND ANALYSIS (Chapter 3.5.3) ---

import matplotlib.pyplot as plt
import pandas as pd

# 1. Compliance Performance (Primary Metric)
plt.figure(figsize=(12, 6))
# Use rolling average to smooth the learning curve
results_df['AvgCompliance_Smoothed'] = results_df['AvgCompliance'].rolling(window=50).mean() 
plt.plot(results_df['AvgCompliance_Smoothed'])
plt.title('Maximum Sustainable Compliance (Smoothed Avg Compliance Rate)')
plt.xlabel('Episode (Simulated Year)')
plt.ylabel('Average Compliance Rate')
plt.grid(True)
plt.show()

# 2. Optimal Resource Allocation (Key Output)
final_alloc_mean = results_df.iloc[-200:].mean() # Average of the last 200 episodes (stable policy)
total_mean_spending = final_alloc_mean['BudgetAllocIEC'] + final_alloc_mean['BudgetAllocENFORCE'] + final_alloc_mean['BudgetAllocINCENTIVE']

allocations = {
    'IEC': final_alloc_mean['BudgetAllocIEC'],
    'Enforcement': final_alloc_mean['BudgetAllocENFORCE'],
    'Incentives': final_alloc_mean['BudgetAllocINCENTIVE']
}

print(f"\nOptimal Budget Allocation (Average of last 200 quarters): ₱{total_mean_spending:,.2f}")
for policy, cost in allocations.items():
    print(f"- {policy}: ₱{cost:,.2f} ({cost/total_mean_spending:.1%})")

TypeError: object.__init__() takes exactly one argument (the instance to initialize)