In [None]:
#| default_exp methods

In [None]:
#| hide
#| export
from gh_pages_example.utils import *
from gh_pages_example.types import *

import functools
import typing

import fastcore.test
from nbdev.showdoc import *
import numpy as np
import nptyping

In [None]:
np.set_printoptions(suppress = True)

# Methods in Evolutionary Game Theory

> A set of methods for solving Evolutionary Games (see Nowak 2006 and the references section)

## Evolutionary Dynamics in Finite Populations

We examine a finite population of players using different strategies who engage in social learning.

In the limit of small mutations, most of the time everyone plays the same strategy. States in which everyone plays the same strategy are known as **monomorphic states**. Occassionally, mutant strategies can fixate in the population, resulting in everyone adopting the same new strategy. We can use Markov Chains to analyse the relative frequencies with which each strategy is played by the population.

The steps for computing the ergodic (i.e. long-run, stationary) strategy distribution is as follows:

1. Build a transition matrix between all monomorphic states
2. Find the ergodic distribution for the markov chain defined using this transition matrix

### Fermi social learning

> A Fermi social learning rule means that individuals make pairwise comparisons between their own strategy and and another strategy in the population that they may choose to copy.

#### Derivation

Each period of the evolutionary game involves individuals being randomly selected to play against one another individual.

Letting $Z$ denote the size of the population, and $π$ denote the game's payoff matrix, we can compute the fitness of a strategy, $B$ for example, when $k$ individuals are of type $B$ as follows:

\begin{equation}
ΠB_k = πBA \frac{k-1}{Z - 1} + πBB \frac{Z-k}{Z- 1}
\end{equation}

where $πBA$ and $πBB$ are the payoffs for playing $B$ against type $A$ or $B$ respectively.

The **Fermi social learning rule** adopts strategy $B$ selected from the population over their current strategy $A$ with probability given by:

\begin{equation}
Pr(adopt \, B | k) = \frac{1}{(1 + \exp^{-\beta (ΠB_k - ΠA_k)})}
\end{equation}

where $ΠB_k - ΠA_k$ is the relative fitness of strategy $B$ over $A$ in a population with $k$ individuals of type $B$, the rest of type $A$. Notice how the larger the relative fitness, the closer the denominator, and therefore the probability, is to $1$.

Using the Fermi social learning rule above, we can write the probability of increasing the number of type $B$ individuals as

\begin{equation}
T^+_B(k) = \frac{Z-k}{Z} \frac{k}{Z} Pr(adopt \, B | k) 
\end{equation}
Z
as an individual of type $A$ needs to randomly be chosen to compare their strategy against someone of type $B$.

and the probability of decreasing the number of type $B$ individuals as

\begin{equation}
T^-_B(k) = \frac{k}{Z} \frac{Z-k}{Z} Pr(adopt \, A | k) 
\end{equation}

as an individual of type $B$ needs to randomly be chosen to compare their strategy against someone of type $A$.

We will often employ their ratio, which is: 

\begin{equation}
\frac{T^-_B(k)}{T^+_B(k)} = \frac{Pr(adopt \, A | k) }{Pr(adopt \, B | k)} = \frac{1 + \exp^{-\beta (ΠB_k - ΠA_k)}}{1 + \exp^{-\beta (ΠA_k - ΠB_k)}}
\end{equation}

Notice that $\frac{1 + \exp^x}{1 + \exp^{-x}} = \exp^{x}$

So, this ratio simplifies to $\frac{T^-_B(k)}{T^+_B(k)} =  \exp^{-\beta (ΠB_k - ΠA_k)}$


#### Definition

In [None]:
#| export

def fermi_learning(fitnessA:nptyping.NDArray, # fitness of strategy A
                   fitnessB:nptyping.NDArray, # fitness of strategy B
                   β:nptyping.NDArray, # learning rate
                  ) -> nptyping.NDArray:
    """Compute the likelihood that a player with strategy A adopts strategy B using the fermi function."""
    return (1 + np.exp(-β*(fitnessB - fitnessA)))**(-1)

#### Examples and Tests

When each strategy has the same fitness, then the likelihood that a player adopts strategy $B$ is 50%, no matter the value of $\beta$.

In [None]:
x = fermi_learning(np.array([5]),
                   np.array([5]),
                   np.array([1]),)
nptyping.assert_isinstance(x, nptyping.NDArray[nptyping.Shape["1"], typing.Any])
fastcore.test.test_eq(x, 0.5)

### Fixation rate

> The fixation rate for type B in a population of type A, $\rho$, is defined as the probability that the appearance of a mutant of type B leads to the entire population adopting type B instead of A, i.e. what is the likelihood that a mutant of type B invades population A.

#### Derivation


A derivation of the fixation rate defined below can be found in Nowak 2006 (reproduced below).

> Consider a one-dimensional stochastic process on a discrete state space, $ i \in \{0, 1, \cdots, N\}$ that represents the number of individuals in a population of $N$ individuals who are of type $B$, the rest are type $A$.
>
> In each stochastic event, the number of individuals of type $B$ can at most increase or decrease by 1.
>
> For a given number of individuals, $i$, let $a_i$, $b_i$, and $1 - a_i - b_i$ represent the chance of an increase, decrease, or no change in $i$.
> 
> This stochastic process follows the transition matrix ,$P$ (*not to be confused with the transition matrices we discuss elsewhere!*)
>
>
> \begin{equation}
P \, = \, \begin{pmatrix}
1 & 0 & 0 & \cdots & 0 & 0 & 0\\
b_1 & (1 - a_1 - b_1) & a_1 & \cdots & 0 & 0 & 0\\
\vdots & \vdots & \vdots & \ddots & \vdots & \vdots & \vdots\\
0 & 0 & 0 & \cdots & b_{n-1} & (1 - a_{n-1} - b_{n-1}) & a_{n-1}\\
0 & 0 & 0 & \cdots & 0 & 0 & 1\\
\end{pmatrix}
\end{equation}
>
> Denote by $x_i$ the probability of reaching state $N$ when starting from $i$.
>
> From transition matrix $P$ above, we can see that $x_i$ must satisfy:
>
> $x_0 = 0$
>
> $x_i = b_i x_{i-1} + (1 - a_i - b_i) x_i + a_i x_{i+1}$
>
> $x_N = 1$
>
> The fixation rate for a mutant B in a population of type A is clearly $x_1$
>
> We can solve for $x_i$ by rewriting the above as $b_i x_i - b_i  x_{i-1} = a_i x_{i+1} - a_i x_i$.
> 
> We can denote $y_i = x_i - x_{i-1}$ to simplify the above to $y_{i+1} = \frac{b_i}{a_i} y_i$
>
> Notice that $\sum_{i=1}^N{y_i} = x_N - x_0 = 1$ and that $y_1 = x_1$
>
> We can use the above to write
\begin{equation}
x_1 + {\sum_{i=2}^N{y_i}} = x_1 (1 + {\sum_{i=1}^{N-1}{\prod_{j=1}^{i} \frac{b_j}{a_j}}}) = 1
\end{equation}
>
> And so
\begin{equation}
x_1 = \frac{1}{(1 + \sum_{i=1}^{N-1}{\prod_{j=1}^{i} \frac{b_j}{a_j}})}
\end{equation}
>
> Note that $x_1$ is the fixation rate for a mutant $B$ in a population of type $A$, often denoted as $\rho$.
>
> *Also note that $1 - x_{N-1}$ is the fixation rate for a mutant $A$ in a population of type $B$. We could find expressions for all $x_i$ if we note that $x_i = x_1 (1 + \sum_{j=1}^{i-1}{\prod_{k=1}^{j} \frac{b_k}{a_k}})$ (see Nowak 2006 for further details).*

We can use our definitions above to determine when the fixation rate for a mutant $B$ in a population of type $A$ is greater than that for a mutant $A$ in a population of type $B$. 

This condition requires that $x_1 > 1 - x_{N-1}$, i.e. $\frac{1}{(1 + \sum_{i=1}^{N-1}{\prod_{j=1}^{i} \frac{b_j}{a_j}})} > \frac{\prod_{j=1}^{N-1} \frac{b_j}{a_j}}{(1 + \sum_{i=1}^{N-1}{\prod_{j=1}^{i} \frac{b_j}{a_j}})}$.

Using the fermi social learning rule and the aforementioned simplifications, we can see that this condition holds true whenever $1 > \exp^{-\beta \sum_{j=1}^{N-1}{\Pi_B(j) - \Pi_A(j)}}$ which implies $\sum_{j=1}^{N-1}{\Pi_B(j)} > \sum_{j=1}^{N-1}{\Pi_A(j)}$.

Lastly, we can make use of the equation $\sum_{j=1}^{N-1}{j}=\frac{(N-1) N}{2}$ to simplify this condition to $\pi_{BA} + \pi_{BA} > \pi_{AA} + \pi_{AB}$

This is exactly the risk dominance condition implied by 2 by 2 payoff matrices. The risk dominance condition has been used in the literature to offer a reason to motivate selecting one monomorphic equilibria over another in such games. In such games there is a precise connection between risk dominance and the monomorphic equilibria selected for by social learning. This connection disappears in games with larger payoff matrices (which is why theorists tends to consider the concept of stochastic stability instead, perhaps using Young's method (Young 2003)).

Even in games with more than 2 players (or populations), we can make use of this condition to tell us in which direction the fixation rate is stronger between two strategies. At times, this is enough to gain an intuition for the gradient of selection present in polymorphic states where multiple strategies coexist in one or more populations.

#### Definition

In [None]:
#| export

T_type = list[nptyping.NDArray[nptyping.Shape["N_models"], typing.Any]]

def fixation_rate(Tplus: T_type, # A list of NDarrays, one array (of size n_models) for each possible number of mutants in the population; the probability of gaining one mutant
                  Tneg: T_type, # A list of NDarrays, one array (of size n_models) for each possible number of mutants in the population; the probability of losing one mutant
                 ) -> nptyping.NDArray[nptyping.Shape["N_models"], typing.Any]: # Fixation rates for the given strategy in each model
    """Calculate the likelihood that a mutant invades the population."""
    Z = len(Tplus) + 1
    ρ = (np.sum([np.prod([Tneg[j-1]/Tplus[j-1]
                         for j in range(1,i+1)],
                        axis=0,
                        keepdims=False)
                 for i in range(1,Z)],
                axis=0,
                keepdims=False)
        + 1)**-1
    return ρ

In [None]:
show_doc(fixation_rate)

---

[source](https://github.com/PaoloBova/gh-pages-example/blob/main/gh_pages_example/methods.py#L32){target="_blank" style="float:right; font-size:smaller"}

### fixation_rate

>      fixation_rate (Tplus:list[nptyping.base_meta_classes.NDArray],
>                     Tneg:list[nptyping.base_meta_classes.NDArray])

Calculate the likelihood that a mutant invades the population.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| Tplus | list | A list of NDarrays, one array (of size n_models) for each possible number of mutants in the population; the probability of gaining one mutant |
| Tneg | list | A list of NDarrays, one array (of size n_models) for each possible number of mutants in the population; the probability of losing one mutant |
| **Returns** | **NDArray** | **Fixation rates for the given strategy in each model** |

#### Examples and Tests

When the chance of gaining a mutant always equals the chance of losing a mutant, then the fixation rate will be $\frac{1}{Z}$

Note that because we have to sample the population for a mutant and the player of the type being invaded, the chance of gaining or losing a mutant can be no greater than $\frac{k}{Z} \frac{Z-k}{Z}$

In [None]:
Z = 2 # With Z=2, we only need to evaluate Tplus and Tneg for when k=1
Tplus_example = [np.array([1/8])]
Tneg_example =  [np.array([1/8])]

In [None]:
#|  hide
# validate test inputs
assert len(Tplus_example) == len(Tneg_example)
for tplus, tneg in zip(Tplus_example, Tneg_example):
    assert tplus.shape == tneg.shape

In [None]:
fixation_rate_result = fixation_rate(Tplus_example, Tneg_example)

In [None]:
#| hide
nptyping.assert_isinstance(fixation_rate_result,
                           nptyping.NDArray[nptyping.Shape["1"], typing.Any])

True

In [None]:
fastcore.test.test_eq(fixation_rate_result, np.array([0.5]))

When the chance of gaining a mutant is half the chance of losing a mutant, then the fixation rate will be

\begin{equation}
\rho = \frac{1}{(1 + \sum_{j=1}^{Z-1}{2^j})}
\end{equation}

When $Z=2$, we have $\rho = \frac{1}{3}$

In [None]:
Z = 2 # With Z=2, we only need to evaluate Tplus and Tneg for when k=1
Tplus_example = [np.array([0.1])]
Tneg_example =  [np.array([0.2])]

In [None]:
#|  hide
# validate test inputs
assert len(Tplus_example) == len(Tneg_example)
for i, tplus in enumerate(Tplus_example):
    assert tplus.shape == Tneg_example[i].shape

In [None]:
fixation_rate_result = fixation_rate(Tplus_example, Tneg_example)

In [None]:
fastcore.test.test_eq(fixation_rate_result, np.array([1/3]))

In [None]:
#| hide
nptyping.assert_isinstance(fixation_rate_result,
                           nptyping.NDArray[nptyping.Shape["1"], typing.Any])

True

We could instead consider an example where we have a mutant Defector (D) who appears in a population of Cooperators (C) playing a standard Prisoner's Dilemma.

We will consider an example of such a scenario where chance of gaining/losing a D player be given by $\frac{1}{1 + e^{\pm \beta \frac{Z+1}{Z-1}}}$.

The fixation rate will be given by the following expression:

\begin{equation}
\rho = \frac{1}{1 + \sum_{j=1}^{Z-1}{(\frac{1 + e^{- \beta \frac{Z+1}{Z-1}}}{1 + e^{\beta \frac{Z+1}{Z-1}}})^j}}
\end{equation}

For this example, we will let $\beta=1$ and $Z=10$, so $\beta \frac{Z+1}{Z-1} = \frac{11}{9}$.

In [None]:
β = 1
Z = 10
ρ_CD = 1 / (1 + sum((1 + np.exp(- β * (Z + 1) / (Z-1)))**j 
                    / (1 + np.exp(β * (Z + 1) / (Z-1)))**j
                    for j in range(1, Z)))
Tplus_example = [np.array([1 / (1 + np.exp(- β * (Z + 1) / (Z-1)))])
                 for _ in range(Z-1)]
Tneg_example =  [np.array([1 / (1 + np.exp(β * (Z + 1) / (Z-1)))])
                 for _ in range(Z-1)]

In [None]:
#|  hide
# validate test inputs
assert len(Tplus_example) == len(Tneg_example)
for i, tplus in enumerate(Tplus_example):
    assert tplus.shape == Tneg_example[i].shape

In [None]:
#| hide
nptyping.assert_isinstance(fixation_rate(Tplus_example, Tneg_example),
                           nptyping.NDArray[nptyping.Shape["1"], typing.Any])

True

In [None]:
fastcore.test.is_close(fixation_rate(Tplus_example, Tneg_example), ρ_CD)

True

Finally, it is useful to know how the fixation rate behaves when any elements of Tplus are zero (as the fixation rate divides by those elements). Even though the Fermi learning rule we use theoretically gives a number between 0 and 1 exclusive, in practise the number may underflow to a 0 if low enough. This will cause unexpected behaviour if we allow it in our alogorithm for computing the transition matrix.

We can avoid this issue by using a slightly altered method for calculating the fixation rate, taking advantage of our choice to use the `fermi_learning` rule.

In the above fixation rate calculations we used the `fermi_learning` function to calculate the probability of a player with strategy $D$ adopting strategy $C$ (and likewise for the probability of a player with $C$ adopting $D$). Their ratio takes the form, $\frac{1 + e^x}{1 + e^{-x}}$. It is not too hard to verify that $\frac{1 + e^x}{1 + e^{-x}} = e^x$.

Moreover, we can avoid taking the product of the ratios at all, since the product of exponentials (with the same base) is just the exponential of the sum of their exponents.

By using the above substitution and algebraic manipulation, we can substantially mitigate the numerical stability issues. For this reason, we will not use `fermi_learning` nor `fixation_rate` in our algorithm at all (although in most cases we would expect these methods to yield the same answers). Instead, we will use `fixation_rate_stable`.

In [None]:
#| export
def fixation_rate_stable(ΠA:list, # Average payoffs for the strategy A they consider adopting for each number of mutants following A
                         ΠB:list, # Average payoffs for the strategy B that the player currently follows for each number of mutants following A
                         β:Array1D, # learning rate 
                        ):
    """Calculate the likelihood that a mutant B invades population A
    using a numerically stable method."""
    fastcore.test.test_eq(len(ΠA), len(ΠB))
    Z = len(ΠA) + 1
    ρ = (np.sum([np.exp(np.clip(np.sum([-β*(ΠB[j-1] - ΠA[j-1])
                                        for j in range(1,i+1)],
                                       axis=0,
                                       keepdims=False),
                                -500,
                                500)) # avoid underflow/overflow warnings
                 for i in range(1,Z)],
                axis=0,
                keepdims=False)
        + 1)**-1
    return ρ

We can see in the examples which follow that both methods usually give the same answers.

To match an earlier example where `Tplus` and `Tneg` were both equal to $\frac{1}{8}$ (as $Z=2$ we only need to consider one value for each when $k=1$), we let $\beta=1$ and recall that $T^+_B(k) = \frac{Z-k}{Z} \frac{k}{Z} Pr(adopt \, B | k) = \frac{Z-k}{Z} \frac{k}{Z} \frac{1}{1 + \exp^{-\beta (ΠB(k) - ΠA(k))}} $

We can then say that $ΠA - ΠB = \log{(\frac{1}{\frac{4}{8}} - 1)} = \log{\frac{4}{4}} = \log{4} - \log{4}$

Notice that to achieve netural drift, the payoffs have to be equal.

In [None]:
Z = 2
β = 1
ΠA = [np.array([np.log(4)])]
ΠB = [np.array([np.log(4)])]
result = fixation_rate_stable(ΠA, ΠB, β)
fastcore.test.test_close(result, 0.5)

We can also consider an example from a payoff matrix I've run into in practise.

In [None]:
payoffs = np.array([[51, 0.6, 51],
                    [114.3, 57.75, 39.38],
                    [51, 0.99798, 51]])

We are interested in the fixation rate of a mutant B in a population of A

Strategy A is the strategy represented by row 3

Strategy B is the strategy represented by row 2


In [None]:
Z = 100
β = 1

We need only the average payoffs for the stable calculation.

In [None]:
ΠA = [k/(Z-1) * payoffs[2,1] + (Z-k-1)/(Z-1) * payoffs[2,2]
      for k in range(1, Z)]
ΠB = [(k-1)/(Z-1) * payoffs[1,1] + (Z-k)/(Z-1) * payoffs[1,2]
      for k in range(1, Z)]

result_stable = fixation_rate_stable(ΠA, ΠB, β)

We also need the adoption rates for the unstable calculation

In [None]:
Tneg = [fermi_learning(ΠB[k-1], ΠA[k-1], β)
                     for k in range(1, Z)]
Tplus = [fermi_learning(ΠA[k-1], ΠB[k-1], β)
         for k in range(1, Z)]

# Naiive and unstable calculation
result_unstable = fixation_rate(Tplus, Tneg)

In [None]:
fastcore.test.test_close(result_stable, 0)
fastcore.test.test_close(result_unstable, 0)

### Build transition matrix

Recall that step 1 of finding the solution to the Evolutionary Game dynamics is to build a transition matrix between all monomorphic states. 

The transition matrix captures the probability that if the population of the Evolutionary Game transitions to another state. We read an entry of the transition matrix as saying the probability of transitioning from the row state to column state.

In [None]:
#| export
class ModelTypeEGT():
    """This is the schema for an Evolutionary Game Theory model.
    
    Note: This schema is not enforced and is here purely for documentation
    purposes."""
    def __init__(self, 
                 Z: int, # the size of the population
                 strategy_set: list[str], # the set of strategies in the model
                 β: Array1D, # the learning rate
                 payoffs: Array3D, # the payoffs of the game
                 transition_matrix: Array3D=None, # the model's transition matrix
                 ergodic: Array2D=None, # ergodic distribution of the model's markov chain
                ):
        pass

In [None]:
show_doc(ModelTypeEGT)

---

[source](https://github.com/PaoloBova/gh-pages-example/blob/main/gh_pages_example/methods.py#L69){target="_blank" style="float:right; font-size:smaller"}

### ModelTypeEGT

>      ModelTypeEGT (Z:int, strategy_set:list[str],
>                    β:gh_pages_example.types.Array1D,
>                    payoffs:gh_pages_example.types.Array3D,
>                    transition_matrix:gh_pages_example.types.Array3D=None,
>                    ergodic:gh_pages_example.types.Array2D=None)

This is the schema for an Evolutionary Game Theory model.

Note: This schema is not enforced and is here purely for documentation
purposes.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| Z | int |  | the size of the population |
| strategy_set | list |  | the set of strategies in the model |
| β | Array1D |  | the learning rate |
| payoffs | Array3D |  | the payoffs of the game |
| transition_matrix | Array3D | None | the model's transition matrix |
| ergodic | Array2D | None | ergodic distribution of the model's markov chain |

In [None]:
#| export
#| hide
@multi
def build_transition_matrix(models:dict # A dictionary that contains the parameters in `ModelTypeEGT`
                           ):
    """Build a transition matrix between all monomorphic states using the
    fermi social learning rule."""
    return models.get('dispatch-type')
    

@method(build_transition_matrix)
def build_transition_matrix(models:dict # A dictionary that contains the parameters in `ModelTypeEGT`
                           ):
    """Build a transition matrix between all monomorphic states
    using the fermi social learning rule for each model.    
    """
    
    Z, S, β = [models[k] for k in ['Z','strategy_set', 'β']]
    π = models['payoffs']
    n_models = π.shape[0]
    M = np.zeros(( n_models, len(S), len(S)))
    for row_ind, s in enumerate(S):
        for col_ind, sₒ in enumerate(S):
            if row_ind == col_ind:
                M[:, row_ind, row_ind] += 1
                # We calibrate these entries later so rows add up to 1
                continue
            πAA = π[:, row_ind, row_ind]
            πAB = π[:, row_ind, col_ind]
            πBA = π[:, col_ind, row_ind]
            πBB = π[:, col_ind, col_ind]
            ΠA = [πAA*(Z-k-1)/(Z-1) + πAB*k/(Z-1)
                  for k in range(1, Z)]
            ΠB = [πBA*(Z-k)/(Z-1)  + πBB*(k-1)/(Z-1)
                  for k in range(1, Z)]
            # We use a numerically stable method to find the fixation rate, ρ.
            # ρ is the probability that mutant B successfully invades A
            ρ = fixation_rate_stable(ΠA, ΠB, β)
            M[:, row_ind, col_ind] = ρ / max(1, len(S)-1)
            M[:, row_ind, row_ind] -= ρ / max(1, len(S)-1)
    return {**models, "transition_matrix": M}

In [None]:
#| export
#| hide
@method(build_transition_matrix, 'unstable')
def build_transition_matrix(models:dict # A dictionary that contains the parameters in `ModelTypeEGT`
                           ):
    """Build a transition matrix using a numerically unstable method."""
    
    Z, S, β = [models[k] for k in ['Z','strategy_set', 'β']]
    π = models['payoffs']
    n_models = π.shape[0]
    M = np.zeros(( n_models, len(S), len(S)))
    for row_ind, s in enumerate(S):
        for col_ind, sₒ in enumerate(S):
            if row_ind == col_ind:
                M[:, row_ind, row_ind] += 1
                # We calibrate these entries later so rows add up to 1
                continue
            πAA = π[:, row_ind, row_ind]
            πAB = π[:, row_ind, col_ind]
            πBA = π[:, col_ind, row_ind]
            πBB = π[:, col_ind, col_ind]
            ΠA = [πAA*(Z-k-1)/(Z-1) + πAB*k/(Z-1)
                  for k in range(1, Z)]
            ΠB = [πBA*(Z-k)/(Z-1)  + πBB*(k-1)/(Z-1)
                  for k in range(1, Z)]
            Tneg = [fermi_learning(ΠB[k-1], ΠA[k-1], β)
                    for k in range(1, Z)]
            Tplus = [fermi_learning(ΠA[k-1], ΠB[k-1], β)
                     for k in range(1, Z)]
            ρ = fixation_rate(Tplus, Tneg)
            M[:, row_ind, col_ind] = ρ / max(1, len(S)-1)
            M[:, row_ind, row_ind] -= ρ / max(1, len(S)-1)
    return {**models, "transition_matrix": M}

In [None]:
show_doc(build_transition_matrix.__dispatch_fn__)

---

[source](https://github.com/PaoloBova/gh-pages-example/blob/main/gh_pages_example/methods.py#L188){target="_blank" style="float:right; font-size:smaller"}

### build_transition_matrix

>      build_transition_matrix (models:dict)

Build a transition matrix between all monomorphic states using the
fermi social learning rule.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| models | dict | A dictionary that contains the parameters in `ModelTypeEGT` |

#### Examples and Tests

Consider the following two examples.

**Example 1**

Let all payoffs be equal in the game's payoff matrix. All expected payoffs will be equal too.

So, Fermi learning will say that each individual has a 50% chance of adopting the behaviour of the one they observe.

We therefore have an equal chance during each epoch of gaining or losing an individual of the given type, in this example we denote the type as $s \in \{A, B\}$, although this probability depends on population size $Z$ and the current number of individuals of that type, $k$, $T^+_s(k) = T^-_s(k) = \frac{Z-k}{Z} \frac{k}{Z} \frac{1}{2}$.

Recall that we calculate the fixation rate, $\rho$ as follows:
\begin{equation}
\rho = \frac{1}{1 + \sum_{j=1}^{N-1}{\prod_{k=1}^{j} \frac{b_k}{a_k}}}
\end{equation}
where $N=Z$, $b_k = T^-_s(k)$ and $a_k = T^+_s(k)$

In this example, for each strategy $s$, $T^-_s(k) = T^+_s(k), \, \forall k$, so $\rho = \frac{1}{Z}$.

We only have $2$ strategies, and $Z=10$, so the final transition matrix will look like

\begin{equation}
M \, = \, \begin{pmatrix}
1 - \frac{\rho}{2 - 1} & \frac{\rho}{2 - 1} &\\
\frac{\rho}{2 - 1} & 1 - \frac{\rho}{2 - 1}\\
\end{pmatrix}
= \begin{pmatrix}
0.9 & 0.1 &\\
0.1 & 0.9\\
\end{pmatrix}
\end{equation}

Note that the above example describes neutral drift, the idea that even if there is no advantage to be gained from any particular strategy, social learning can still result in the spread of that behaviour. Neutral drift also occurs if we set the Fermi learning rate $\beta = 0$, no matter what payoff matrix describes the game.

In [None]:
payoffs = np.array([[[2, 2],
                     [2, 2]]
                   ])
Z = 10
β = 1
models = {"payoffs": payoffs,
          "Z": Z,
          "β": β,
          "strategy_set": ["A", "B"],
         }
result = build_transition_matrix(models)

In [None]:
fastcore.test.test_close(result['transition_matrix'],
                         np.array([[0.9, 0.1],
                                   [0.1, 0.9]]))

**Example 2**

Let the payoff matrix be akin to a Prisoner's Dilemma with two strategies, $C$ or $D$ (Cooperate or Defect respectively):

\begin{pmatrix}
2 & 0\\
3 & 1\\
\end{pmatrix}

Again, for this simple example, the relative average success of strategy $C$ is independent of the number of $C$ players, $k$. This is rarely the case in practise but permits a more legible example.

$C$'s relative success over $D$ will be $\frac{2 (k-1)}{Z-1} - \frac{3 k + (Z - k - 1)}{Z-1} = - \frac{Z + 1}{Z-1}$.

Fermi learning means the probability of a $D$ player adopting what they see $C$ do is:

\begin{equation}
\frac{1}{1 + e^{- \beta (\Pi_C(k) - \Pi_D(k))}} = \frac{1}{1 + e^{\beta \frac{Z + 1}{Z-1}}}
\end{equation}

The fixation rate for mutant $C$ in a population of $D$ players, $\rho_{DC}$, can be computed as

\begin{equation}
\rho_{DC} = \frac{1}{1 + \sum_{j=1}^{Z-1}{(\frac{1 + e^{\beta \frac{Z + 1}{Z-1}}}{1 + e^{-\beta \frac{Z + 1}{Z-1}}})^j}}
\end{equation}

Similarly, the fixation rate for mutant $D$ in a population of $C$ players, $\rho_{CD}$, can be computed as 

\begin{equation}
\rho_{CD} = \frac{1}{1 + \sum_{j=1}^{Z-1}{(\frac{1 + e^{-\beta \frac{Z + 1}{Z-1}}}{1 + e^{\beta \frac{Z + 1}{Z-1}}})^j}}
\end{equation}

For $Z=10$ and $\beta = 1$, the above yields the following transition matrix,

\begin{equation}
M \, = \, \begin{pmatrix}
1 - \frac{\rho_{CD}}{2 - 1} & \frac{\rho_{CD}}{2 - 1} &\\
\frac{\rho_{DC}}{2 - 1} & 1 - \frac{\rho_{DC}}{2 - 1}\\
\end{pmatrix}
\approx \begin{pmatrix}
0.295 & 0.705 &\\
0.000 & 1.000\\
\end{pmatrix}
\end{equation}



Note how in the above fixation rate calculations how we used the `fermi_learning` function to calculate the probability of a player with strategy $D$ adopting strategy $C$ (and likewise for the probability of a player with $C$ adopting $D$). This function has special properties which aid us in calculating the fixation rate.

Notice how the ratio of the two adoption rates takes the form, $\frac{1 + e^x}{1 + e^{-x}}$. It is not too hard to verify that $\frac{1 + e^x}{1 + e^{-x}} = e^x$.

We utilities this property to considerably improve the numerical stability of our algorithm for building a transition matrix. For this reason, we do not use `fermi_learning` in our algorithm at all.

We can similarly note that $\frac{1}{1 + e^{-x}} = 1 - \frac{1}{1 + e^{x}}$, i.e. the two adoption rates are complementary probabilities.


In [None]:
payoffs = np.array([[[2, 0],
                     [3, 1]],
                   ])
Z = 10
β = 1
models = {"payoffs": payoffs,
          "Z": Z,
          "β": β,
          "strategy_set": ["C", "D"],
         }
result = build_transition_matrix(models)

In [None]:
ρ_CD = 1 / (1 + sum((1 + np.exp(- β * (Z + 1) / (Z-1)))**j 
                    / (1 + np.exp(β * (Z + 1) / (Z-1)))**j
                    for j in range(1, Z)))
ρ_DC = 1 / (1 + sum((1 + np.exp(β * (Z + 1) / (Z-1)))**j
                    / (1 + np.exp(- β * (Z + 1) / (Z-1)))**j 
                    for j in range(1, Z)))

In [None]:
ρ_CD_alt = 1 / (1 + sum(np.exp(- j * β * (Z + 1) / (Z-1))
                        for j in range(1, Z)))
ρ_DC_alt = 1 / (1 + sum(np.exp(j * β * (Z + 1) / (Z-1))
                        for j in range(1, Z)))

In [None]:
fastcore.test.test_close(ρ_CD, ρ_CD_alt)
fastcore.test.test_close(ρ_DC, ρ_DC_alt)

In [None]:
fastcore.test.test_close(result['transition_matrix'],
                         np.array([[1- ρ_CD, ρ_CD],
                                   [ρ_DC, 1 - ρ_DC]]))

#### Example 3

Here is an additional example for the 3 by 3 matrix we discussed when testing other functions.

This time, we make sure we get the correct probabilities for each transition.

In [None]:
payoffs = np.array([[[51, 0.6, 51],
                     [114.3, 57.75, 39.38],
                     [51, 0.99798, 51]],
                   ])

In [None]:
expected = np.array([[[0.495, 0.5, 0.005],
                     [0, 1, 0],
                     [0.005, 0, 0.995]],
                   ])

In [None]:
Z = 100
β = 1
models = {"payoffs": payoffs,
          "Z": Z,
          "β": β,
          "strategy_set": ["AS", "AU", "PS"],
         }
result = build_transition_matrix(models)
fastcore.test.test_close(result['transition_matrix'], expected)

### Find ergodic strategy distribution

Step 2 is to find the ergodic distribution for the Evolutionary Game using the transition matrix we constructed in step 1.

Let $M$ denote the transition matrix, and $\omega_t$ be the column vector describing the proportions with which each strategy is played in the population.

We can describe the evolution of this system with $\omega_{t+1} = M^T \omega_t$, i.e. the proportion of players that use a given strategy in the next round will be equal to the sum of the proportions of players for each strategy who adopted that strategy in the current round. Equivalently, we can also consider $\omega_t$ as describing the probabilities that the system at time t is in each of the monomorphic states.

As each of the monomporphic states described in the transition matrix is reachable from any other with some probability and since the transition probabilities only depend on the current state, what we have is a markov chain which is irreducible.

The ergodicity theorem guarantees that such irreducible and aperiodic markov chains have an ergodic distribution that the system converges to, no matter where it starts. An ergodic distribution (also called a stationary distribution),  $\omega^*$ satisfies  $\omega^* = M^T \omega^*$ [[1]](https://gregorygundersen.com/blog/2019/10/28/ergodic-markov-chains/) [[2]](http://www.stat.columbia.edu/~liam/teaching/neurostat-spr11/papers/mcmc/Ergodicity_Theorem.pdf) [[3]](https://textbooks.math.gatech.edu/ila/1553/stochastic-matrices.html).

Our ergodic distribution, $\omega^*$, is therefore defined as the normalised right-hand eigenvector with eigenvalue 1 of the transposed transition matrix, $M^T$ (or equivalently, if we defined $\omega$ as a row vector instead, $\omega^*$ would be the left-hand eigenvector with eigenvalue 1 of transition matrix, $M$; numerical computing packages usually return the right-hand eigenvectors more directly, which is why I used the other formalism).

We use standard linear algebra methods from the [numpy](https://numpy.org/) package to find this eigenvector. These numerical methods will usually not return an eigenvector which is normalised to sum to 1, so we must normalise the eigenvector we are given. See their documentation to learn more about these numerical methods.

In [None]:
#| export
def find_ergodic_distribution(models:dict # A dictionary that contains the parameters in `ModelTypeEGT`
                             ):
    """Find the ergodic distribution of a markov chain with the
    given transition matrix."""
    M = models["transition_matrix"]
    # find unit eigenvector of markov chain
    Λ,V = np.linalg.eig(M.transpose(0,2,1))
    V = np.real_if_close(V)
    x = np.isclose(Λ, 1)
    # if multiple unit eigenvalues then choose the first
    y = np.zeros_like(x, dtype=bool)
    idx = np.arange(len(x)), x.argmax(axis=1)
    y[idx] = x[idx]
    ergodic = np.array(V.transpose(0,2,1)[y], dtype=float)
    # ensure ergodic frequencies are positive and sum to 1
    ergodic = np.abs(ergodic) / np.sum(np.abs(ergodic), axis=1)[:, None]
    return {**models, 'ergodic':ergodic}

#### Examples and Tests

Let our transition matrix, $M$ be

\begin{equation}
M = \begin{pmatrix}
\frac{3}{4} & \frac{1}{4} \\
\frac{1}{4} & \frac{3}{4} \\
\end{pmatrix}
\end{equation}

Note that $M^T$ is a stochastic matrix because each column of the transposed matrix would sum to $1$ (in general the rows of the transposed matrix are unlikely to sum to 1, but choosing an example like the above makes it easy to compute the eigenvectors).

It's not too hard to verify that the characteristic polynomial of $M^T$ can be factored into $(\lambda - 1)(\lambda - \frac{1}{2})$, so we have two eigenvalues, $1$ and $\frac{1}{2}$.

It's not too hard to verify that column vector $[1, 1]$ is the eigenvector of $M^T$ with eigenvalue $1$
.

Now that we know the weights placed on each strategy, we can compute the strategy distribution by normalising our eigenvector.

The ergodic distribution i $\omega^* = [\frac{1}{2}, \frac{1}{2}]$.

In [None]:
M = np.array([[[3/4, 1/4],
               [1/4, 3/4]],
             ])
models = {"transition_matrix": M}
result = find_ergodic_distribution(models)

In [None]:
fastcore.test.test_eq(result['ergodic'],
                      np.array([[1/2, 1/2]]))

In [None]:
# #| hide
# # Here is some code which illustrates how one could use sympy to find the relevant eigenvectors 
# # using symbolic methods (but please note that even sympy must resort to numerical methods if
# # the matrices are bigger than 5 by 5 in size, due to the fundamental lack of exact solutions to 
# # polynomial equations with order greater than 5)
# import sympy
# for m in M:
#     # Sympy needs integers or expressions to work
#     # Integers is usually safer
#     m = np.array(1000 * m, dtype=int)
#     M_symbolic = sympy.Matrix(m)
#     for result in M_symbolic.eigenvects():
#         lamda, multiplicity, evs = result
        
#         # print("lambda: " , lamda,
#         #           "multiplicity: ", multiplicity,
#         #           "eigenvectors: ", evs)

Here is another quick illustrative example.

Let our transition matrix, $M$ be

\begin{equation}
M = \begin{pmatrix}
\frac{3}{4} & \frac{1}{4} \\
\frac{3}{4} & \frac{1}{4} \\
\end{pmatrix}
\end{equation}

$M^T$ is a stochastic matrix. It is easy to verify that $[\frac{3}{4}, \frac{1}{4}]$ is the normalised eigenvector with eigenvalue 1.


In [None]:
M = np.array([[[3/4, 1/4],
               [3/4, 1/4]],
             ])
models = {"transition_matrix": M}
result = find_ergodic_distribution(models)

In [None]:
fastcore.test.test_eq(result['ergodic'],
                      np.array([[3/4, 1/4]]))

In [None]:
show_doc(find_ergodic_distribution)

---

[source](https://github.com/PaoloBova/gh-pages-example/blob/main/gh_pages_example/methods.py#L159){target="_blank" style="float:right; font-size:smaller"}

### find_ergodic_distribution

>      find_ergodic_distribution (models:dict)

Find the ergodic distribution of a markov chain with the
given transition matrix.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| models | dict | A dictionary that contains the parameters in `ModelTypeEGT` |

### Run full markov chain algorithm

Finally, here is a helper function to both build the transition matrix for the model and find its ergodic distribution.

In [None]:
#| export
def markov_chain(models:dict # A dictionary that contains the parameters in `ModelTypeEGT`
                ):
    """Find the ergodic distribution of the evolutionary
    game given by each model in models."""
    return thread_macro(models,
                        build_transition_matrix,
                        find_ergodic_distribution)

In [None]:
show_doc(markov_chain)

---

[source](https://github.com/PaoloBova/gh-pages-example/blob/main/gh_pages_example/methods.py#L178){target="_blank" style="float:right; font-size:smaller"}

### markov_chain

>      markov_chain (models:dict)

Find the ergodic distribution of the evolutionary
game given by each model in models.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| models | dict | A dictionary that contains the parameters in `ModelTypeEGT` |

## Multiple Populations

In [None]:
#|export
#| hide
@method(build_transition_matrix, 'multiple-populations')
def build_transition_matrix(models:dict # A dictionary that contains the parameters in `ModelTypeEGT`
                           ):
    """Build a transition matrix between all monomorphic states
    when there are multiple populations.    
    """
    Z, S, β = [models[k] for k in ['Z', 'recurrent_state_space', 'β']]
    valid_transitions = models['valid_transitions']
    π = models['payoffs']
    M = np.zeros((payoffs.shape[0], len(S), len(S)))
    for row_ind in range(M.shape[-1]):
        M[:, row_ind, row_ind] += 1
    for transition in valid_transitions.values():
        strategy_profile_indices = transition['strategy_profile_indices']
        player_index = transition['player_index']
        row_ind = transition['row_ind']
        col_ind = transition['col_ind']
        πAA = π[:, strategy_profile_indices['AA'], player_index]
        πAB = π[:, strategy_profile_indices['AB'], player_index]
        πBA = π[:, strategy_profile_indices['BA'], player_index]
        πBB = π[:, strategy_profile_indices['BB'], player_index]
        ΠA = [πAA*(Z-k-1)/(Z-1) + πAB*k/(Z-1)
              for k in range(1, Z)]
        ΠB = [πBA*(Z-k)/(Z-1)  + πBB*(k-1)/(Z-1)
              for k in range(1, Z)]
        # We use a numerically stable method to find the fixation rate, ρ.
        # ρ is the probability that mutant B successfully invades A
        ρ = fixation_rate_stable(ΠA, ΠB, β)
        # We have to divide this rate by the number of possible mutations
        n_mutations = 0
        for vt in valid_transitions.values():
            if vt['row_ind'] == row_ind:
                n_mutations += 1
        M[:, row_ind, col_ind] = ρ / n_mutations
        M[:, row_ind, row_ind] -= ρ / n_mutations
    return {**models, 'transition_matrix':M}

In [None]:
show_doc(build_transition_matrix.__multi__['multiple-populations'])

---

[source](https://github.com/PaoloBova/gh-pages-example/blob/main/gh_pages_example/methods.py#L188){target="_blank" style="float:right; font-size:smaller"}

### build_transition_matrix

>      build_transition_matrix (models:dict)

Build a transition matrix between all monomorphic states
when there are multiple populations.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| models | dict | A dictionary that contains the parameters in `ModelTypeEGT` |

Here is an example of how to build a transition matrix when we have 2 populations.

In the limit of small mutation rates, the system spends almost all its time in states where each population plays one strategy. Moreover, only a mutant for one population has the opportunity to fixate in that population. This means we only need to consider transitions where the strategy played by one population has changed. Transitions where both populations would have to change strategy occur with probability 0.

As we are working with multiple populations, our `models` variable needs to declare this with the `dispatch-type` key.

This time, the `payoffs` key must have payoffs for each population. We also need a set of `strategy_contests`, not only a `strategy_set`. `strategy_contests` tells us which strategy profiles are being compared for the relevant transition, which population is affected, and where to find the relevant payoffs for the comparison.

`payoffs` is a 3D Array containing payoffs for each model, each strategy profile, and each player.

**Note:** the above representation can also capture games where multiple players from the same population interact with players from another population (e.g. 2 companies and 1 regulator play an R&D game). This is achieved by noticing that players from the same population receive the same payoffs in a game as their counterparts do if they were to switch strategies (if we had a model with subpopulations, a similar logic applies but this time to each subpopulation rather than the population).

**TODO:** the above representation needs to capture games where it is uncertain from which populations players will be sampled from. For example, in some climate change negotiation games, there are rich and poor subpopulations but due to random selection, games may feature all rich, all poor, or some mix of players. An alternative representation which may scale better is to give different symbols to each strategy depending on the subpopulation. In this way, a strategy implicitly informs us from which population the player is from. However, with such a representation, it is essential for `valid_transitions` to describe which strategies must be compared.

**Warning:** In games with many players (e.g. > 100), the space of possible strategy profiles can be too large to justify storing payoffs in an array. Instead, payoffs will be a function that is called when needed.

`valid-transitions` is a dictionary of transitions to information about the relevant players in the contest and the indices of the relevant payoffs for computing the likelihood of the transition.

In [None]:
Z = 10
β = 1
payoffs = np.array([[[2, 0, 2],
                     [3, 1, 3],
                     [3, 1, 3],
                     [4, 2, 4],
                     [2, 0, 2],
                     [3, 1, 3],
                     [3, 1, 3],
                     [4, 2, 4]],
                   ])
recurrent_state_space = ["AX", "AY", "BX", "BY"]
valid_transitions = {"AX->AY": {"row_ind": 0,
                                "col_ind": 1,
                                "player_index": 0,  # We need to know which player's payoff matters
                                # We need to know where to look for the payoffs
                                # of the 4 strategy profiles relevant to deriving
                                # the fixation rate for the given transition.
                                "strategy_profile_indices": {"AA": 0, "AB": 1,
                                                             "BA": 2, "BB": 3}},
                     "AY->AX": {"row_ind": 1,
                                "col_ind": 0,
                                "player_index": 0,
                                "strategy_profile_indices": {"AA": 3, "AB": 2,
                                                             "BA": 1, "BB": 0}},
                     "BX->BY": {"row_ind": 2,
                                "col_ind": 3,
                                "player_index": 0,
                                "strategy_profile_indices": {"AA": 4, "AB": 5,
                                                             "BA": 6, "BB": 7}},
                     "BY->BX": {"row_ind": 3,
                                "col_ind": 2,
                                "player_index": 0,
                                "strategy_profile_indices": {"AA": 7, "AB": 6,
                                                             "BA": 5, "BB": 4}},
                     
                     # Regulators do not face each other, so their payoffs do
                     # not depend on the number of mutants they see.
                     # There are only two strategy profiles to consider, so for
                     # compatibility with `build_transition_matrix`, we repeat
                     # indices where necessary.
                     
                     "AX->BX": {"row_ind": 0,
                                "col_ind": 2,
                                "player_index": 2,
                                "strategy_profile_indices": {"AA": 0, "AB": 0,
                                                             "BA": 4, "BB": 4,
                                                            }},
                     "AY->BY": {"row_ind": 1,
                                "col_ind": 3,
                                "player_index": 2,
                                "strategy_profile_indices": {"AA": 3, "AB": 3,
                                                             "BA": 7, "BB": 7,
                                                            }},
                     "BX->AX": {"row_ind": 2,
                                "col_ind": 0,
                                "player_index": 2,
                                "strategy_profile_indices": {"AA": 4, "AB": 4,
                                                             "BA": 0, "BB": 0,
                                                            }},
                     "BY->AY": {"row_ind": 3,
                                "col_ind": 1,
                                "player_index": 2,
                                "strategy_profile_indices": {"AA": 7, "AB": 7,
                                                             "BA": 3, "BB": 3,
                                                            }},
                    }

# What if Regulators could transition as a mutant company fixates (independent of the company's chance of fixating)
# How do two populations emerge in the first place from one population? 
# Market games. Number of players of each strategy determines the market price which informs a strategy's average payoff
# instead of averaging over the number of players of each strategy.

models = {"dispatch-type": "multiple-populations",
          "β": β,
          "Z": Z,
          "recurrent_state_space": recurrent_state_space,
          "valid_transitions": valid_transitions,
          "payoffs": payoffs,
         }

In [None]:
build_transition_matrix(models)

{'dispatch-type': 'multiple-populations',
 'β': 1,
 'Z': 10,
 'recurrent_state_space': ['AX', 'AY', 'BX', 'BY'],
 'valid_transitions': {'AX->AY': {'row_ind': 0,
   'col_ind': 1,
   'player_index': 0,
   'strategy_profile_indices': {'AA': 0, 'AB': 1, 'BA': 2, 'BB': 3}},
  'AY->AX': {'row_ind': 1,
   'col_ind': 0,
   'player_index': 0,
   'strategy_profile_indices': {'AA': 3, 'AB': 2, 'BA': 1, 'BB': 0}},
  'BX->BY': {'row_ind': 2,
   'col_ind': 3,
   'player_index': 0,
   'strategy_profile_indices': {'AA': 4, 'AB': 5, 'BA': 6, 'BB': 7}},
  'BY->BX': {'row_ind': 3,
   'col_ind': 2,
   'player_index': 0,
   'strategy_profile_indices': {'AA': 7, 'AB': 6, 'BA': 5, 'BB': 4}},
  'AX->BX': {'row_ind': 0,
   'col_ind': 2,
   'player_index': 2,
   'strategy_profile_indices': {'AA': 0, 'AB': 0, 'BA': 4, 'BB': 4}},
  'AY->BY': {'row_ind': 1,
   'col_ind': 3,
   'player_index': 2,
   'strategy_profile_indices': {'AA': 3, 'AB': 3, 'BA': 7, 'BB': 7}},
  'BX->AX': {'row_ind': 2,
   'col_ind': 0,
   'pl

8 recurrent states since we have 3 sectors who each can choose one of 2 strategies.
This means we have 8 * 3cr1 = 24 possible transitions.

More generally, the number of valid transitions = n_recurrent_states * SUM_j(n_strategies_j - 1)
where n_recurrent_states = PROD_j(n_strategies_j) for each population j.

For 4 populations and 3 strategies each, this is 648 valid transitions.
This is clearly untenable! Granted, an 81 by 81 transition matrix may be
a little bit unwieldly, but its eigenvectors are certainly computable.

On the other hand, it is certainly plausible to calculate the fixation rates for
the comparison of each state. If we do this programatically, perhaps there is
some filter we can program which marks some of the transitions as 0.
Consistency is key. If we index each state by i, we must be able to index where
each transition is in the transition matrix. If we order the states by the numbers
of each type, we might have a way to create our filter too.

One complication is that once we know the two states to compare, we must allow
a general function for computing the average payoffs (and have a way of resolving
how that function uses the payoff matrix and given states to compute the average payoffs)

We should use a binary index when each sector only has two strategies. We can
generalise this to another base depending on the number of strategies.
We can then convert this state index to base 10 for placement in our
transition matrix. A transition is easily represented by a pair of these numbers in
whichever base we prefer.

As a filter we can notice that if we take the difference of any two state indices
we should find that only one of the numbers is non-zero. This would be our filter
for a model with multiple sectors.

What remains is for us to have useful information at hand for computing the
payoff matrices. Create a profile index and filter. Allow player order to matter if desired.
To allow such general games with sampling between the populations and variable player order
we have to specify additional profile filter rules to go along with our game type.

Our average payoff function can then calculate the profile in the way relevant to the filter rule.
An excellent way of representing the payoffs would be to have a nested map.
Level one indexes the players of the game. Level two indexes the profile using
our profile index. Level three indexes the models. Level two is the most important,
since this is where we can apply our profile filter to retain only the payoffs relevant
to computing the average payoff (we could switch level one and two if that is easier).

There is a reason why I haven't suggested using a 3D array in this case. 
Very simple payoff models where all or the majority of sectors are fixed players
have many invalid player orderings and compositions. We wouldn't want to force
the user to create a larger array than they need.

I also suspect that the nested map is far more legible. Note that our algorithm
only uses 1D arrays from our payoff matrices anyways.

For further legibility, it makes sense to generate a map from the state index
to their human readable labels (e.g. 111 -> CCC).

Consider DataArray, xarray, and Pandas instead of numpy structured arrays or dicts: https://numpy.org/doc/stable/user/basics.rec.html#structured-datatypes

#### Examples and tests

**Example 1**

This example comes from a paper by Encarnacao et al. 2016.

They have a 3 sector model and report fixation probabilities for a particular scenario. Can we replicate it?

**Step 1:** define the payoffs of the game for each strategy profile. While the payoffs are fairly simple to represent as a 2D array for each model, we use a data structure which has been specifically chosen for comptability with more nuanced models (below this is a nested python dict).

TODO: at least use python default_dicts instead of core dicts

In [None]:
def payoffs_encanacao_2016(models):
    names = ['b_r', 'b_s', 'c_s', 'c_t', 'σ']
    b_r, b_s, c_s, c_t, σ = [models[k] for k in names]
    payoffs = {}
    n_players = 3
    n_sectors = 3
    n_strategies_per_sector = [2, 2, 2]
    n_strategies_total = 6
    index_min = "0-0-0" # All players are from the first sector, playing that sector's first strategy
    index_max = "5-5-5" # All players are from the third sector, playing that sector's second strategy
    # Note: The seperator makes it easy to represent games where n_strategies_total >= 10.
    
    # It is also trivial to define a vector which maps these indexes to strategy profiles
    # As sector order is fixed we could neglect to mention suscripts for each sector
    strategy_names = ["D", "C", "D", "C", "D", "C"]
    
    zero = np.zeros(b_r.shape[0])
    # As in the main text
    payoffs["C-C-C"] = {"P3": b_r-2*c_s,
                        "P2": σ+b_s-c_t,
                        "P1": σ+b_s}
    payoffs["C-C-D"] = {"P3": -c_s,
                        "P2": b_s-c_t,
                        "P1": zero}
    payoffs["C-D-C"] = {"P3": b_r-c_s,
                        "P2": zero,
                        "P1": b_s}
    payoffs["C-D-D"] = {"P3": zero,
                        "P2": σ,
                        "P1": σ}
    payoffs["D-C-C"] = {"P3": zero,
                        "P2": σ-c_t,
                        "P1": σ}
    payoffs["D-C-D"] = {"P3": zero,
                        "P2": -c_t,
                        "P1": zero}
    payoffs["D-D-C"] = {"P3": zero,
                        "P2": zero,
                        "P1": zero}
    payoffs["D-D-D"] = {"P3": zero,
                        "P2": σ,
                        "P1": σ}
    
    # The following indexes capture all strategy profiles where each player is fixed to a unique sector
    # (and player order does not matter, so we need only consider one ordering of sectors).
    payoffs["4-2-0"] = payoffs["D-D-D"]
    payoffs["4-2-1"] = payoffs["D-D-C"]
    payoffs["4-3-0"] = payoffs["D-C-D"]
    payoffs["4-3-1"] = payoffs["D-C-C"]
    payoffs["5-2-0"] = payoffs["C-D-D"]
    payoffs["5-2-1"] = payoffs["C-D-C"]
    payoffs["5-3-0"] = payoffs["C-C-D"]
    payoffs["5-3-1"] = payoffs["C-C-C"]
    return {**models, "payoffs": payoffs}

Next, we need to provide a filter rule so that we know which payoffs are relevant to our average payoff (or success) computations.

Given the number of sectors, and number of players, and a rule which in this case is the allowed allowed sectors per player.

We can specify this as a dict from player to a vector or set of sectors. \
We can use the number of sectors to mention valid strategies for each player. \
Our sectors are indexed from 0 to n_sectors - 1 (from right to left)\
Our players are indexed from 0 to n_players - 1 (from right to left)

We should have a method for creating all possible player profiles. Once we apply
our filters, we will know exactly which profiles we need to write payoffs for.
We could also use this method to validate that we have passed sufficient information
to payoffs, and warn us about what is missing.

In [None]:
#| export
def create_all_profiles(models):
    """Create all strategy profiles for the set of models."""
    n_players, n_strategies = [models[k] for k in ['n_players', 'n_strategies']]
    n_strategies_total = np.sum(n_strategies)
    n_profiles = n_strategies_total ** n_players
    strategy_axis = np.arange(n_strategies_total)[:, None]
    grid = build_grid_from_axes([strategy_axis for _ in range(n_players)])
    profiles = []
    for row in grid:
        profile = "-".join(map(str, row))
        profiles.append(profile)
    fastcore.test.test_eq(len(profiles), n_profiles)
    return {**models, "profiles": profiles}

**Tests for `create_all_profiles`**

In [None]:
#| export
result = create_all_profiles({"n_players": 2, "n_strategies": 2})
fastcore.test.test_eq(result['profiles'], ["0-0", "0-1", "1-0", "1-1"])

result = create_all_profiles({"n_players": 2, "n_strategies": [2, 2]})
fastcore.test.test_eq(result['profiles'],
                       ["0-0", "0-1", "0-2", "0-3",
                        "1-0", "1-1", "1-2", "1-3",
                        "2-0", "2-1", "2-2", "2-3",
                        "3-0", "3-1", "3-2", "3-3",])

fastcore.test.test_eq(create_all_profiles({"n_players": 2,
                                           "n_strategies": [2, 2]})['profiles'],
                     create_all_profiles({"n_players": 2,
                                          "n_strategies": 4})['profiles'])

In [None]:
#| export

@multi
def profile_filter(models):
    "Filter strategy profiles to those which satisfy the given rule."
    return models.get('profile_filter_rule')

@method(profile_filter, 'allowed_sectors')
def profile_filter(models):
    """Filter strategy profiles to only those where players are from their
    allowed sectors."""
    profiles = models.get('profiles_filtered',
                          models.get('profiles',
                                     create_all_profiles(models)['profiles']))
    allowed_sectors = models['allowed_sectors']
    sector_strategies = models['sector_strategies']
    profiles_filtered = []
    for k in profiles:
        k_tuple = list(map(int, k.split("-")))
        valid = True
        for i, ind in enumerate(k_tuple[::-1]):
            allowed_inds = np.hstack([sector_strategies[j]
                                      for j in allowed_sectors[f"P{i+1}"]])
            if ind not in allowed_inds:
                valid = False
        if valid==True:
            profiles_filtered.append(k)
    return {**models, "profiles_filtered": profiles_filtered}

@method(profile_filter)
def profile_filter(models):
    """The default filter method leaves models unchanged."""
    print("""`profile_filter` called but `models` did not specify a
           `profile_filter_rule`. Try specifying one.""")
    return models

We also need a filter which yields the relevant profiles for each transition considered.

Given two states and their transition we first check that it is valid, and if we so we know
the sector affected. Only that sector may choose different strategies, so the others are fixed.

Note that the recurrent states of the game describes each strategy employed by each sector. This is written in the same form as we write the strategy profile, "{strategy_code}-{strategy_code}-{strategy_code}" but this time each strategy code refers to a strategy for each sector (right to left), and they are indexed from 0 again to mark a clear difference from the codes for the strategy profile. This also makes it straight forward to build up the the set of allowed strategies.

**Note:** Below, my filter only keeps strategies which are relevant to the two recurrent states relevant to the transition. If we need additional strategies, this filter will not be sufficient (this might be the case if we allow a social learning rule which explores more than one mutant strategy at a time)

In [None]:
#| export
@method(profile_filter, 'relevant_to_transition')
def profile_filter(models):
    """Filter for strategy profiles relevant to the given transition."""
    ind1, ind2 = models['transition_indices']
    sector_strategies = models['sector_strategies']
    profiles = models.get('profiles_filtered',
                          models.get('profiles',
                                     create_all_profiles(models)['profiles']))
    ind1_tuple = list(map(int, ind1.split("-")))
    ind2_tuple = list(map(int, ind2.split("-")))
    differ = [i1!=i2 for i1, i2 in zip(ind1_tuple, ind2_tuple)]
    valid = sum(differ) == 1
    if valid:
        affected_sector = f"S{np.argmax(differ[::-1]) + 1}"
        flexible_strategies = sector_strategies[affected_sector]
        strategies1 = [sector_strategies[f"S{i+1}"][ind]
                             for i, ind in enumerate(ind1_tuple[::-1])]
        strategies2 = [sector_strategies[f"S{i+1}"][ind]
                       for i, ind in enumerate(ind2_tuple[::-1])]
        strategies_valid = np.unique(np.hstack([strategies1, strategies2]))
        profiles_filtered = []
        for profile in profiles:
            relevant = True
            for strategy in list(map(int, profile.split("-"))):
                if strategy not in strategies_valid:
                    relevant = False
            if relevant == True:
                profiles_filtered.append(profile)
        relevant = True
        return {**models, "profiles_filtered": profiles_filtered}
    return models

**Tests for `profile_filter`**

Let's test the `"allowed_sectors"` filter rule.

First I test a game with 3 players but only two sectors.\
Each player is fixed to a specific sector, but two players belong to the same sector.\
In this case the rule should filter to only those profiles where players use\
the strategies available to the sectors they can play as.

In [None]:
#| export
allowed_sectors = {"P3": ["S2"],
                   "P2": ["S2"],
                   "P1": ["S1"]}
sector_strategies = {"S2": [2, 3],
                     "S1": [0, 1]}
n_players = 3
n_strategies = [2, 2] # this could be derived from sector_strategies or the other way round.
models = {"profile_filter_rule": "allowed_sectors",
          "n_players": n_players,
          "n_strategies": n_strategies,
          "allowed_sectors": allowed_sectors,
          "sector_strategies": sector_strategies}
result = profile_filter(models)['profiles_filtered']
# Only strategy 2 is irrelevant
expected = ["2-2-0", "2-2-1", 
            "2-3-0", "2-3-1",
            "3-2-0", "3-2-1",
            "3-3-0", "3-3-1",]
fastcore.test.test_eq(result, expected)
fastcore.test.test_eq(len(result), 8)

I then test a 3 player game with 3 sectors. Each player is fixed to a particular sector.

In [None]:
#| export
allowed_sectors = {"P3": ["S3"],
                   "P2": ["S2"],
                   "P1": ["S1"]}
sector_strategies = {"S3": [4, 5],
                     "S2": [2, 3],
                     "S1": [0, 1]}
models = {"profile_filter_rule": "allowed_sectors",
          "n_players": 3,
          "n_strategies": [2, 2, 2],
          "allowed_sectors": allowed_sectors,
          "sector_strategies": sector_strategies}
result = profile_filter(models)['profiles_filtered']
expected = ["4-2-0",
            "4-2-1",
            "4-3-0",
            "4-3-1",
            "5-2-0",
            "5-2-1",
            "5-3-0",
            "5-3-1",
           ]
fastcore.test.test_eq(result, expected)

Now, let's test the `"relevant_to_transition"` filter rule.

First I test a game with 3 players but only two populations.\
In this case the rule should filter to only the relevant profiles where strategy 2 is missing.\
Recall that strategy 2 is the first strategy available to a player from sector 2.

In [None]:
#| export
sector_strategies = {"S2": [2, 3],
                     "S1": [0, 1]}
transition_indices = ["1-0", "1-1"]
n_players = 3
n_strategies = [2, 2] # this could be derived from sector_strategies or the other way round.
models = {"profile_filter_rule": "relevant_to_transition",
          "n_players": n_players,
          "n_strategies": n_strategies,
          "sector_strategies": sector_strategies,
          "transition_indices": transition_indices}
result = profile_filter(models)['profiles_filtered']
# Only strategy 2 is irrelevant
expected = ["0-0-0", "0-0-1", "0-0-3", 
            "0-1-0", "0-1-1", "0-1-3",
            "0-3-0", "0-3-1", "0-3-3",
            "1-0-0", "1-0-1", "1-0-3",
            "1-1-0", "1-1-1", "1-1-3",
            "1-3-0", "1-3-1", "1-3-3",
            "3-0-0", "3-0-1", "3-0-3",
            "3-1-0", "3-1-1", "3-1-3",
            "3-3-0", "3-3-1", "3-3-3"]
fastcore.test.test_eq(result, expected)
expected = (np.sum(n_strategies) - 1) ** n_players  # 1 of the 4 strategies won't be relevant here
fastcore.test.test_eq(len(result), expected)

I then test a larger game with 3 players and 3 sectors. The list is long, so
I only check that the number of profiles kept is what we expect.

In [None]:
#| export
sector_strategies = {"S3": [4, 5],
                     "S2": [2, 3],
                     "S1": [0, 1]}
transition_indices = ["1-1-1", "1-1-0"]
n_players = 3
n_strategies = [2, 2, 2]
models = {"profile_filter_rule": "relevant_to_transition",
          "n_players": n_players,
          "n_strategies": n_strategies,
          "sector_strategies": sector_strategies,
          "transition_indices": transition_indices}
result = profile_filter(models)['profiles_filtered']
expected = (np.sum(n_strategies) - 2) ** n_players  # 2 of the 6 strategies won't be relevant here
fastcore.test.test_eq(len(result), expected)

I now test the `"allowed_sectors"` and "`relevant_to_transition`" rules when used together.\
The game is as before with 3 players and 3 sectors. Here, we can check that the list of profiles is as expected.

In [None]:
#| export
allowed_sectors = {"P3": ["S3"],
                   "P2": ["S2"],
                   "P1": ["S1"]}
sector_strategies = {"S3": [4, 5],
                     "S2": [2, 3],
                     "S1": [0, 1]}
transition_indices = ["1-1-1", "1-1-0"]
n_players = 3
n_strategies = [2, 2, 2]
models = {"profile_filter_rule": "relevant_to_transition",
          "n_players": n_players,
          "n_strategies": n_strategies,
          "allowed_sectors": allowed_sectors,
          "sector_strategies": sector_strategies,
          "transition_indices": transition_indices}
result = thread_macro(models,
                      profile_filter,
                      (assoc, "profile_filter_rule", "allowed_sectors"),
                      profile_filter)
expected = ["5-3-0", "5-3-1"]
fastcore.test.test_eq(len(result['profiles_filtered']), 2)
fastcore.test.test_eq(result['profiles_filtered'], expected)

The order we apply these two filter rules should not matter.

In [None]:
#| export
allowed_sectors = {"P3": ["S3"],
                   "P2": ["S2"],
                   "P1": ["S1"]}
sector_strategies = {"S3": [4, 5],
                     "S2": [2, 3],
                     "S1": [0, 1]}
transition_indices = ["1-1-1", "1-1-0"]
n_players = 3
n_strategies = [2, 2, 2]
models = {"profile_filter_rule": "relevant_to_transition",
          "n_players": n_players,
          "n_strategies": n_strategies,
          "allowed_sectors": allowed_sectors,
          "sector_strategies": sector_strategies,
          "transition_indices": transition_indices}
result1 = thread_macro(models,
                       profile_filter,
                       (assoc, "profile_filter_rule", "allowed_sectors"),
                       profile_filter)
result2 = thread_macro(models,
                       (assoc, "profile_filter_rule", "allowed_sectors"),
                       profile_filter,
                       (assoc, "profile_filter_rule", "relevant_to_transition"),
                       profile_filter)
expected = ["5-3-0", "5-3-1"]
fastcore.test.test_eq(result1['profiles_filtered'], expected)
fastcore.test.test_eq(result2['profiles_filtered'], expected)
fastcore.test.test_eq(result1['profiles_filtered'],
                      result2['profiles_filtered'])

Let's also check the eariler game with 2 sectors and 3 players.

In [None]:
#| export
allowed_sectors = {"P3": ["S2"],
                   "P2": ["S2"],
                   "P1": ["S1"]}
sector_strategies = {"S2": [2, 3],
                     "S1": [0, 1]}
transition_indices = ["1-0", "1-1"]
n_players = 3
n_strategies = [2, 2] # this could be derived from sector_strategies or the other way round.
models = {"profile_filter_rule": "relevant_to_transition",
          "n_players": n_players,
          "n_strategies": n_strategies,
          "allowed_sectors": allowed_sectors,
          "sector_strategies": sector_strategies,
          "transition_indices": transition_indices}
result1 = thread_macro(models,
                       profile_filter,
                       (assoc, "profile_filter_rule", "allowed_sectors"),
                       profile_filter)
result2 = thread_macro(models,
                       (assoc, "profile_filter_rule", "allowed_sectors"),
                       profile_filter,
                       (assoc, "profile_filter_rule", "relevant_to_transition"),
                       profile_filter)
expected = ["3-3-0", "3-3-1"]
fastcore.test.test_eq(result1['profiles_filtered'], expected)
fastcore.test.test_eq(result2['profiles_filtered'], expected)
fastcore.test.test_eq(result1['profiles_filtered'],
                      result2['profiles_filtered'])

Notice that in this game, transitions which affect the sector with multiple players require us to look at more profiles.

In [None]:
#| export
allowed_sectors = {"P3": ["S2"],
                   "P2": ["S2"],
                   "P1": ["S1"]}
sector_strategies = {"S2": [2, 3],
                     "S1": [0, 1]}
transition_indices = ["0-1", "1-1"]
n_players = 3
n_strategies = [2, 2] # this could be derived from sector_strategies or the other way round.
models = {"profile_filter_rule": "relevant_to_transition",
          "n_players": n_players,
          "n_strategies": n_strategies,
          "allowed_sectors": allowed_sectors,
          "sector_strategies": sector_strategies,
          "transition_indices": transition_indices}
result1 = thread_macro(models,
                       profile_filter,
                       (assoc, "profile_filter_rule", "allowed_sectors"),
                       profile_filter)
result2 = thread_macro(models,
                       (assoc, "profile_filter_rule", "allowed_sectors"),
                       profile_filter,
                       (assoc, "profile_filter_rule", "relevant_to_transition"),
                       profile_filter)
expected = ["2-2-1", "2-3-1", "3-2-1", "3-3-1"]
fastcore.test.test_eq(result1['profiles_filtered'], expected)
fastcore.test.test_eq(result2['profiles_filtered'], expected)
fastcore.test.test_eq(result1['profiles_filtered'],
                      result2['profiles_filtered'])

Here is a method for checking that a given transition is valid.

In [None]:
#| export
def valid_transition(ind1:str, # The index of the current state, expressed in the form "{strategy_code}-{strategy_code}-{strategy_code}"
                     ind2:str,  # The index of the next state, expressed in the same form as `ind1`
                    ) -> bool: # True if the transition is valid, false otherwise
    """Check if the transition from ind1->ind2 is valid
    i.e. that only one population undergoes a change in strategy."""
    ind1_tuple = list(map(int, ind1.split("-")))
    ind2_tuple = list(map(int, ind2.split("-")))
    differ = [i1!=i2 for i1, i2 in zip(ind1_tuple, ind2_tuple)]
    valid = sum(differ) == 1
    return valid

**Tests for `valid_transition`**

In [None]:
#| export
fastcore.test.test_eq(valid_transition("1-1-1", "2-1-1"), True)
fastcore.test.test_eq(valid_transition("1-1-1", "2-1-2"), False)
fastcore.test.test_eq(valid_transition("1-1-1", "0-0-0"), False)
fastcore.test.test_eq(valid_transition("1-1-1", "22-1-3"), False)
fastcore.test.test_eq(valid_transition("1-1-1", "1-1-1"), False) # Even though possible, self transitions are marked as false since we never compute them directly

We now have the methods we need for building the transition matrix for a game with an arbitrary number of sectors and various interactions between those sectors.

Such a method takes these steps:
1. For each possible transition
  - Check if the transition is valid
    - If self-transition, assign the value 1
    - If not, skip
  - Filter profiles down to only those which are relevant
  - Compute average payoffs using the payoffs and those profiles
  - Compute the fixation rate
  - Compute transition probabilities as before

**Note:** we do not yet support an arbitrary number of games with cross-learning between them. Those types of games are better suited when we explain tools from multilayer network science. In most simple cases, these games can be reframed as a single game and then our code applies.

In [None]:
#| export
def get(m:dict, k:str):
    "Get attribute k from dictionary m."
    return m.get(k)

In [None]:
#| export
def apply_profile_filters(models):
    "Apply all profile filters listed in `profile_filters` in `models`."
    for rule in models.get('profile_filters', 
                           ["allowed_sectors",
                            "relevant_to_transition"]):
        models = profile_filter({**models, "profile_filter_rule": rule})
    return models

@multi
def compute_success(models):
    "Compute the success of the two strategies under consideration."
    return models.get('compute_success_rule')

@method(compute_success)
def compute_success(models):
    """Compute the success of the two strategies under consideration for each
    number of k mutants implied by the transition."""
    models = apply_profile_filters(models)
    ind1, ind2 = models['transition_indices']
    payoffs = models['payoffs']
    profiles_filtered = models['profiles_filtered']
    payoffs_filtered = {k:v for k,v in payoffs.items() if k in profiles_filtered}
    sector_strategies = models['sector_strategies']
    allowed_sectors = models['allowed_sectors']
    ind1_tuple = list(map(int, ind1.split("-")))
    ind2_tuple = list(map(int, ind2.split("-")))
    differ = [i1!=i2 for i1, i2 in zip(ind1_tuple, ind2_tuple)]
    valid = sum(differ) == 1
    affected_sector = f"S{np.argmax(differ[::-1]) + 1}"
    current_strategy = sector_strategies[affected_sector][ind1[np.argmax(differ)]]
    mutant_strategy = sector_strategies[affected_sector][ind2[np.argmax(differ)]]
    relevant_players = [player
                        for player, sectors in allowed_sectors.items()
                        if affected_sector in sectors]
    # As all other sectors are fixed, we care only about the number of
    # players who can be from the affected sector, and whether that sector
    # participating as a player prevents another sector from participating as
    # that player.
    # What we do depends on the number of players.
    # TODO: Generalise code so that it can handle an arbitrary number of players.
    # Does it not also depend on how freely players can switch among the other sectors
    # as this would lead to more profiles which we could feasibly be in. In these
    # cases it matters a lot how the players are sampled. We need a sampling
    # function which samples each player from the available sectors.
    # With some probability our agent is selected to play as a suitable player
    # in a game with the other sampled players.
    # We need a sampler which places an agent as a specific player in each
    # relevant profile with some probability. The sampler function should accept
    # the relevant profiles, the affected sector, and the agent's chosen strategy.
    # It may need additional information too. Once we know how likely the player
    # is to be a given player using the given strategy in each of the relevant
    # profiles, we can easily compute the sucess by taking the dot product
    # of the associated payoffs and probabilities.
    # TODO: Define a sampler for 0 players, 1 players, 2 players, and N players
    if len(relevant_players) == 0:
        raise ValueError("models['allowed_sectors'] never allows affected sector. Either this transition is invalid or allowed_sectors is mistaken.")
    if len(relevant_players) == 1:
        only_player = relevant_players[0]
        current_profile = "_".join(map(str, [sector_strategies[sector][]]))
        new_profile = 
        πAA = πAB = payoffs_filtered[current_profile][only_player]
        πBA = πBB = payoffs_filtered[new_profile][only_player]
    πAA = 
    πAB = 
    πBA = 
    πBB =
    Z = models['Z'][affected_sector]
    ΠA = [πAA*(Z-k-1)/(Z-1) + πAB*k/(Z-1)
          for k in range(1, Z)]
    ΠB = [πBA*(Z-k)/(Z-1)  + πBB*(k-1)/(Z-1)
          for k in range(1, Z)]
    return ΠA, ΠB

In [None]:
#|export
#| hide
@method(build_transition_matrix, 'multiple-populations-v2')
def build_transition_matrix(models:dict # A dictionary that contains the parameters in `ModelTypeEGT`
                           ):
    """Build a transition matrix between all monomorphic states
    when there are multiple populations.    
    """
    Z, S, β = [models[k] for k in ['Z', 'recurrent_state_space', 'β']]
    π = models['payoffs']
    M = np.zeros((payoffs.shape[0], len(S), len(S)))
    for row_ind in range(M.shape[-1]):
        M[:, row_ind, row_ind] += 1
    transition_inds = [(i, j) for i in range(len(S)) for j in range(len(S))]
    for row_ind, col_ind in transition_inds:
        current_state, new_state = S[row_ind], S[col_ind]
        if current_state == new_state:
            continue
        if not valid_transition(current_state, new_state):
            continue
        ΠA, ΠB = compute_success(assoc(models,
                                       "transition_indices",
                                       [current_state, new_state]))
        ρ = fixation_rate_stable(ΠA, ΠB, β)
        n_mutations = sum(valid_transition(current_state, s_alt) for s_alt in S)
        M[:, row_ind, col_ind] = ρ / n_mutations
        M[:, row_ind, row_ind] -= ρ / n_mutations
    return {**models, 'transition_matrix':M}

In [None]:
#         strategy_profile_indices = transition['strategy_profile_indices']
#         player_index = transition['player_index']
#         row_ind = transition['row_ind']
#         col_ind = transition['col_ind']
#         πAA = π[:, strategy_profile_indices['AA'], player_index]
#         πAB = π[:, strategy_profile_indices['AB'], player_index]
#         πBA = π[:, strategy_profile_indices['BA'], player_index]
#         πBB = π[:, strategy_profile_indices['BB'], player_index]
#         ΠA = [πAA*(Z-k-1)/(Z-1) + πAB*k/(Z-1)
#               for k in range(1, Z)]
#         ΠB = [πBA*(Z-k)/(Z-1)  + πBB*(k-1)/(Z-1)
#               for k in range(1, Z)]

In [None]:
# # Create arbitrary payoffs for all possible player combinations
# payoffs = {}
# for i in range(6):
#     for j in range(6):
#         for k in range(6):
#             payoffs[f"{i}-{j}-{k}"] = None
# payoffs_filtered = {k:v for k,v in payoffs.items() if k in profiles_valid}

In [None]:
Z = 10
β = 1
recurrent_state_space = ["0-0-0", "0-0-1", "0-1-0", "0-1-1",
                         "0-1-1", "1-0-0", "1-0-1", "1-1-1"]
payoffs = {}
models = {"dispatch-type": "multiple-populations-v2",
          "β": β,
          "Z": Z,
          "recurrent_state_space": recurrent_state_space,
          "payoffs": payoffs,
         }

# References

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()