# Multiple Observation SocialDilemma

> Class for a symmetric two-agent stateless social dilemma environment

Typical examples are the *Prisoner's Dilemma*, *Stag Hunt* game, and the game of *chicken*/*snowdrift*/*hawk-dove*.

In [1]:
#| default_exp Environments/MultipleObsSocialDilemma

In [2]:
#| hide
# Imports for the nbdev development environment
from nbdev.showdoc import *

In [3]:
#| hide
%load_ext autoreload
%autoreload 2

In [4]:
#| export
from pyCRLD.Environments.Base import ebase

from fastcore.utils import *
from fastcore.test import *

from pyCRLD.Environments.MultipleObservationsEnv import MultipleObservationsEnv

import numpy as np

In [5]:
#| export
class MultipleObsSocialDilemma(MultipleObservationsEnv):
    """
    Symmetric 2-agent 2-action Social Dilemma Matrix Game.
    """ 

    def __init__(self,
                 reward:float,  # reward of mutual cooperation
                 temptation:float,  # temptation of unilateral defection 
                 suckers_payoff:float,  # sucker's payoff of unilateral cooperation
                 punishment:float): # punsihment of mutual defection


        self.reward = reward
        self.temptation = temptation
        self.suckers_payoff = suckers_payoff
        self.punishment = punishment

        # TODO: these variables are expected to be already initialized in the parent class
        # causing a recursive calling and causing the dependency on them to fail
        # therefore we need to initialize them here
        self.n_agents = 2
        self.n_agent_actions = 2
        self.n_states = 1

        self.state = 0 # inital state
        super().__init__()

In [6]:
#| export
@patch
def transition_tensor(self:MultipleObsSocialDilemma):
    """Calculate the Transition Tensor"""
    return np.ones((self.n_states, self.n_agent_actions, self.n_agent_actions, self.n_states))

@patch
def reward_tensor(self:MultipleObsSocialDilemma):
    """Get the Reward Tensor R[i,s,a1,...,aN,s']."""

    R = np.zeros((2, self.n_states, 2, 2, self.n_states))

    R[0, 0, :, :, 0] = [[self.reward , self.suckers_payoff],
                        [self.temptation , self.punishment]]
    R[1, 0, :, :, 0] = [[self.reward , self.temptation],
                        [self.suckers_payoff , self.punishment]]
    return R

@patch
def actions(self:MultipleObsSocialDilemma):
    """The action sets"""
    return [['c', 'd'] for _ in range(self.n_agents)]

@patch
def states(self:MultipleObsSocialDilemma):
    """The states set"""
    return ['.'] 

@patch
def id(self:MultipleObsSocialDilemma):
    """
    Returns id string of environment
    """
    # Default
    id = f"{self.__class__.__name__}_"+\
        f"{self.temptation}_{self.reward}_{self.punishment}_{self.suckers_payoff}"
    return id

### Example

In [7]:
env = MultipleObsSocialDilemma(reward=1, temptation=2, suckers_payoff=-1, punishment=0)

In [8]:
env.id()

'MultipleObsSocialDilemma_2_1_0_-1'

In [9]:
env

MultipleObsSocialDilemma_2_1_0_-1

Reward matrix of agent `0`:

In [10]:
env.reward_tensor()[0,0,:,:,0]

array([[ 1., -1.],
       [ 2.,  0.]])

Reward matrix of agent `1`:

In [11]:
env.reward_tensor()[1,0,:,:,0]

array([[ 1.,  2.],
       [-1.,  0.]])

In [12]:
env.transition_tensor()

array([[[[1.],
         [1.]],

        [[1.],
         [1.]]]])

In [13]:
env.actions()

[['c', 'd'], ['c', 'd']]

In [14]:
env.states()

['.']

In [15]:
#| hide
import nbdev; nbdev.nbdev_export()