In [111]:
import pandas as pd
import math
import random
from collections import namedtuple
from PSSimPy.simulator import ABMSim
from PSSimPy import Bankx

# Set Up Strategic Behavior

## Original Intrady Liquidity Game

In [37]:
MDPState = namedtuple("MDPState", [
    "t",                # current period
    "balance",          # focal player's balance
    "borrowed",         # focal player's borrowed amount
    "obligations",      # focal player's obligations
    "estimated_others"  # dict or other structure for approximating others' states
])

In [123]:

class OriMDPSearch:
    """
    Single-agent MDP from the focal player's viewpoint in an n-player Intraday Liquidity Game.
    
    - We have n_players total.
    - Focal player is ID=0, with (balance, borrowed, obligations).
    - Each other player p in [1..n_players-1] has an estimated state:
        estimated_others[p] = {
            "balance": float,
            "borrowed": float,
            "obligations": { q: amount_that_p_owes_q, ... }
        }
    - Each period:
      1) Possibly new obligations appear (with prob lambda_prob) among pairs (i->j).
      2) All non-focal players do a local "pay if cost_of_pay <= cost_of_delay" step,
         updating their states and sending inbound payments to the focal if they owed.
      3) The focal player's balance is incremented by inbound from others.
      4) The focal player chooses 0=delay or 1=pay for its own obligations, incurring cost.
      5) Next state is built, plus immediate cost for the focal player.

    We can also apply real-time partial observations using 'update_current_state' 
    if we have external info about who paid/delayed, inbound amounts, etc.
    """

    def __init__(self, 
                 n_players=4,
                 n_periods=4, 
                 lambda_prob=0.8, 
                 delta=0.2, 
                 gamma=0.1,
                 seed=42):
        """
        n_players   : total number of players, including the focal
        n_periods   : horizon for the MDP
        lambda_prob : probability new obligation arrives among pairs
        delta       : cost for delaying obligations
        gamma       : cost for borrowing shortfall or carrying borrowed
        seed        : random seed for reproducibility
        """
        random.seed(seed)
        self.n_players = n_players
        self.n_periods = n_periods
        self.lambda_prob = lambda_prob
        self.delta = delta
        self.gamma = gamma

    # ---------------------------------------------------------------------
    # 1. INITIAL STATE CREATION
    # ---------------------------------------------------------------------

    def initial_state(self):
        """
        Build an initial MDPState with zero everything for the focal player,
        plus empty/zero states for each other player.
        """
        estimated_others = {}
        for p in range(1, self.n_players):
            estimated_others[p] = {
                "balance": 0.0,
                "borrowed": 0.0,
                "obligations": { q: 0.0 for q in range(self.n_players) if q != p}
            }

        return MDPState(
            t=0,
            balance=0.0,
            borrowed=0.0,
            obligations=0.0,
            estimated_others=estimated_others
        )

    # ---------------------------------------------------------------------
    # 2. COST & PAY/DELAY LOGIC FOR NON-FOCAL PLAYERS
    # ---------------------------------------------------------------------

    def carry_cost(self, borrowed):
        """
        The cost or fee for carrying 'borrowed' across a period 
        (if we want a per-period interest approach).
        """
        return self.gamma * borrowed if borrowed > 0 else 0.0

    def cost_of_paying_all_obligations(self, p_state):
        """
        For a non-focal or any player's perspective:
          - sum_of_oblig = sum(p_state["obligations"].values())
          - if balance < sum_of_oblig, shortfall => cost_of_pay = gamma * shortfall
          - cost_of_delay = delta * sum_of_oblig
        Return (pay_cost, delay_cost).
        Ties => pay (the code that uses this will do pay if pay_cost<=delay_cost).
        """
        bal = p_state["balance"]
        borr = p_state["borrowed"]
        sum_oblig = sum(p_state["obligations"].values())

        shortfall = max(0, sum_oblig - bal)
        pay_cost = self.gamma * shortfall  # immediate cost for new borrowing
        delay_cost = self.delta * sum_oblig
        return pay_cost, delay_cost

    def apply_pay(self, p_state):
        """
        If a non-focal (or hypothetical focal) player decides to pay all obligations:
         1) Possibly borrow shortfall => cost= gamma*shortfall
         2) obligations -> 0
         3) leftover balance repays borrowed
         Return: (updated_p_state, actual_payments_dict, pay_cost).
        """
        bal = p_state["balance"]
        borr = p_state["borrowed"]
        oblig_map = dict(p_state["obligations"])

        sum_oblig = sum(oblig_map.values())
        shortfall = max(0, sum_oblig - bal)
        pay_cost = self.gamma * shortfall if shortfall>0 else 0.0

        if shortfall>0:
            new_balance = 0.0
            new_borrowed = borr + shortfall
        else:
            new_balance = bal - sum_oblig
            new_borrowed = borr

        # pay each r exactly oblig_map[r]
        actual_payments = {}
        for r, amt in oblig_map.items():
            actual_payments[r] = amt
        new_oblig_map = {r: 0.0 for r in oblig_map}

        # leftover repays borrowed
        if new_balance>0 and new_borrowed>0:
            repay = min(new_balance, new_borrowed)
            new_borrowed -= repay
            new_balance -= repay

        updated_state = {
            "balance": new_balance,
            "borrowed": new_borrowed,
            "obligations": new_oblig_map
        }
        return updated_state, actual_payments, pay_cost

    def apply_delay(self, p_state):
        """
        If a non-focal player delays, cost = delta * sum_oblig (not
        necessarily stored for them). The obligations remain. 
        Return (unchanged_state, zero_payments, delay_cost).
        """
        oblig_map = p_state["obligations"]
        sum_oblig = sum(oblig_map.values())
        delay_cost = self.delta * sum_oblig

        return p_state, {r: 0.0 for r in oblig_map}, delay_cost

    def other_players_actions(self, est_others):
        """
        All non-focal players simultaneously pay or delay 
        if cost_of_pay <= cost_of_delay => pay, else delay.
        Then they update each other's balances accordingly.
        Return (updated_others, inbound_to_focal).
        """
        # Step A: gather each player's action
        pay_or_delay = {}
        for p, pstate in est_others.items():
            pay_cost, delay_cost = self.cost_of_paying_all_obligations(pstate)
            if pay_cost <= delay_cost:
                # pay
                updated_st, payments, pcost = self.apply_pay(pstate)
                pay_or_delay[p] = ("pay", updated_st, payments, pcost)
            else:
                # delay
                updated_st, payments, dcost = self.apply_delay(pstate)
                pay_or_delay[p] = ("delay", updated_st, payments, dcost)

        # Step B: apply all payments simultaneously
        new_others = {}
        for p in est_others:
            new_others[p] = dict(pay_or_delay[p][1])  # copy updated state

        # Distribute payments
        for p in est_others:
            action, updated_st, payments, cost_used = pay_or_delay[p]
            for r, amt in payments.items():
                if amt > 0:
                    if r == 0:
                        # payment to focal => track separately
                        pass
                    else:
                        # r is another non-focal
                        new_others[r]["balance"] += amt

        # How much inbound to focal?
        inbound_to_focal = 0.0
        for p in est_others:
            action, updated_st, payments, cost_used = pay_or_delay[p]
            inbound_to_focal += payments.get(0, 0.0)

        return new_others, inbound_to_focal

    # ---------------------------------------------------------------------
    # 3. RANDOM OBLIGATIONS AMONG ALL PAIRS
    # ---------------------------------------------------------------------

    def maybe_new_obligations(self, state):
        """
        Randomly add +1 obligation for pairs (i->j) with prob lambda_prob.
        If i=0 => focal owes j. If j=0 => i owes focal. Else i & j are in estimated_others.
        Return (new_focal_obl, new_others_obl).
        """
        updated_others = {}
        for p, pstate in state.estimated_others.items():
            new_pstate = {
                "balance": pstate["balance"],
                "borrowed": pstate["borrowed"],
                "obligations": dict(pstate["obligations"])
            }
            updated_others[p] = new_pstate

        new_focal_obl = state.obligations

        # For all distinct i->j
        for i in range(self.n_players):
            for j in range(self.n_players):
                if i == j:
                    continue
                # With prob lambda_prob => i->j +1 obligation
                if random.random() < self.lambda_prob:
                    if i == 0:
                        # focal owes j
                        new_focal_obl += 1.0
                    elif j == 0:
                        # i owes focal
                        updated_others[i]["obligations"][0] += 1.0
                    else:
                        # i, j are among other players
                        updated_others[i]["obligations"][j] += 1.0

        return new_focal_obl, updated_others

    # ---------------------------------------------------------------------
    # 4. TRANSITION FUNCTION FOR FOCAL PLAYER'S MDP
    # ---------------------------------------------------------------------

    def transition_function(self, state, focal_action):
        """
        Return a list of (next_state, prob, immediate_cost) from 'state' 
        given the focal player picks action in {0=delay,1=pay}.

        We do a single branch (prob=1.0) for updated environment logic:
         - pay carry cost for the focal's borrowed
         - let other players pay/delay
         - inbound => focal.balance
         - new obligations appear
         - focal does pay/delay => cost
        """
        if state.t >= self.n_periods:
            # Terminal
            return [(state, 1.0, 0.0)]

        # 1) carry cost for the focal's borrowed from prev period
        carry_cost = self.carry_cost(state.borrowed)

        # 2) other players act => inbound_to_focal
        updated_others, inbound_to_focal = self.other_players_actions(state.estimated_others)

        # 3) add inbound to focal's balance
        mid_balance = state.balance + inbound_to_focal

        # 4) random new obligations
        new_focal_obl, new_others_obl = self.maybe_new_obligations(
            MDPState(state.t, mid_balance, state.borrowed, state.obligations, updated_others)
        )

        # 5) focal player's chosen action
        if focal_action == 1:  # pay
            shortfall = max(0, new_focal_obl - mid_balance)
            borrow_cost = self.gamma * shortfall if shortfall>0 else 0.0
            new_balance = mid_balance
            new_borrowed = state.borrowed
            if shortfall>0:
                new_balance = 0.0
                new_borrowed += shortfall
                new_obl_after = 0.0
            else:
                new_balance = mid_balance - new_focal_obl
                new_obl_after = 0.0

            # repay borrowed if leftover
            if new_balance>0 and new_borrowed>0:
                repay = min(new_balance, new_borrowed)
                new_borrowed -= repay
                new_balance -= repay

            immediate_cost = carry_cost + borrow_cost
            next_st = MDPState(
                t=state.t+1,
                balance=new_balance,
                borrowed=new_borrowed,
                obligations=new_obl_after,
                estimated_others=new_others_obl
            )
            return [(next_st, 1.0, immediate_cost)]

        else:  # action=0 => delay
            delay_cost = self.delta * new_focal_obl
            immediate_cost = carry_cost + delay_cost
            next_st = MDPState(
                t=state.t+1,
                balance=mid_balance,
                borrowed=state.borrowed,
                obligations=new_focal_obl,
                estimated_others=new_others_obl
            )
            return [(next_st, 1.0, immediate_cost)]

    def actions(self, state):
        """
        The focal player's possible actions: 0=delay, 1=pay.
        """
        return [0, 1]

    # ---------------------------------------------------------------------
    # 5. STATE-TO-KEY & DEPTH-LIMITED VALUE
    # ---------------------------------------------------------------------

    def state_to_key(self, state):
        """
        Convert 'estimated_others' into a hashable structure 
        so we can memoize in depth_limited_value.
        """
        def freeze_obligations(oblig_dict):
            return tuple(sorted(oblig_dict.items()))

        frozen_others_list = []
        for p in sorted(state.estimated_others.keys()):
            pst = state.estimated_others[p]
            # freeze obligations
            fro_ob = freeze_obligations(pst["obligations"])
            frozen_others_list.append(
                (p,
                 round(pst["balance"],4),
                 round(pst["borrowed"],4),
                 fro_ob)
            )
        fro_others = tuple(frozen_others_list)

        return (
            state.t,
            round(state.balance,4),
            round(state.borrowed,4),
            round(state.obligations,4),
            fro_others
        )

    def depth_limited_value(self, state, depth, memo=None):
        """
        Depth-limited search for the focal player's best action
        from 'state' up to 'depth' steps into the future.
        Returns (best_value, best_action):
          best_value is the maximum expected reward (i.e. -cost),
          best_action in {0=delay, 1=pay}.
        """
        if memo is None:
            memo = {}

        # Base case
        if depth <= 0 or state.t >= self.n_periods:
            return (0.0, None)

        key = (self.state_to_key(state), depth)
        if key in memo:
            return memo[key]

        best_value = float('-inf')
        best_action = None

        for a in self.actions(state):
            # transitions typically has 1 entry: (next_st, 1.0, immediate_cost)
            transitions = self.transition_function(state, a)
            total_val = 0.0
            for (ns, prob, cost) in transitions:
                immediate_reward = -cost
                future_val, _ = self.depth_limited_value(ns, depth-1, memo)
                total_val += prob * (immediate_reward + future_val)

            if total_val > best_value:
                best_value = total_val
                best_action = a

        memo[key] = (best_value, best_action)
        return memo[key]

    # ---------------------------------------------------------------------
    # 6. UPDATE CURRENT STATE (MANUAL PARTIAL OBSERVATION APPROACH)
    # ---------------------------------------------------------------------

    def update_current_state(self, 
                             current_state, 
                             focal_action, 
                             partial_observations):
        """
        Update 'current_state' manually given a chosen focal_action 
        and partial info about inbound payments or new obligations, 
        plus any updates to other players.

        partial_observations might look like:
        {
          "inbound_payments": 2.0,
          "arrived_obligations": 1,
          "other_updates": {
              1: {"balance": 2.5, "borrowed":0.0, "obligations":{...}},
              ...
          }
        }
        Returns the next MDPState with t+1, new balance/borrowed/obligations
        after applying the focal_action. This is an alternative to 
        the 'transition_function' if you're doing real-time partial updates.
        """
        # 1) inbound from partial obs
        inbound = partial_observations.get("inbound_payments", 0.0)
        new_balance_pre = current_state.balance + inbound

        # 2) new obligations
        newly_arrived = partial_observations.get("arrived_obligations", 0)
        new_obligations = current_state.obligations + newly_arrived

        # 3) merge other_updates into estimated_others
        updated_others = dict(current_state.estimated_others)
        other_updates = partial_observations.get("other_updates", {})
        for pid, newinfo in other_updates.items():
            if pid in updated_others:
                for k,v in newinfo.items():
                    updated_others[pid][k] = v

        # 4) apply focal_action
        if focal_action == 1:  # pay
            shortfall = max(0,new_obligations - new_balance_pre)
            # immediate cost would be self.gamma*shortfall, not stored in state
            new_balance = new_balance_pre
            new_borrowed = current_state.borrowed
            if shortfall>0:
                new_balance = 0.0
                new_borrowed += shortfall
                new_obl_after = 0.0
            else:
                new_balance = new_balance_pre - new_obligations
                new_obl_after = 0.0

            # leftover repays borrowed
            if new_balance>0 and new_borrowed>0:
                repay = min(new_balance, new_borrowed)
                new_borrowed -= repay
                new_balance -= repay

            next_state = MDPState(
                t=current_state.t+1,
                balance=new_balance,
                borrowed=new_borrowed,
                obligations=new_obl_after,
                estimated_others=updated_others
            )
            return next_state
        else:
            # delay => new_obligations remain
            next_state = MDPState(
                t=current_state.t+1,
                balance=new_balance_pre,
                borrowed=current_state.borrowed,
                obligations=new_obligations,
                estimated_others=updated_others
            )
            return next_state

In [None]:
# Smoke test

mdp = OriMDPSearch(
    n_players=10,
    n_periods=10,
    lambda_prob=0.8,
    delta=0.2,   # cost for delaying
    gamma=0.4,    # cost for borrowing
    seed=random.randint(0,400)
)

init_s = mdp.initial_state()

# num_delay = 0
# num_pay = 0
# for _ in range(100):
#     best_val, best_act = mdp.depth_limited_value(init_s, depth=10)
#     if best_act == 0:
#         num_delay += 1
#     else:
#         num_pay += 1

# print(f"Played 'Delay': {num_delay}")
# print(f"Played 'Pay': {num_pay}")

best_val, best_act = mdp.depth_limited_value(init_s, depth=10)
print(mdp.current_state)

Played 'Delay': 96
Played 'Pay': 4


In [None]:
# Set up strategic bank agent
class OriStrategicBank(Bank):

    def __init__(self, name, strategy_type='OriStrategic', **kwargs):
        super().__init__(name, strategy_type, **kwargs)
    
    # overwrite strategy
    def strategy(self, txns_to_settle: set, all_outstanding_transactions: set, sim_name: str, day: int, current_time: str, queue) -> set:
        pass