# Social Dilemma

> Class for a symmetric two-agent stateless social dilemma environment

Typical examples are the *Prisoner's Dilemma*, *Stag Hunt* game, and the game of *chicken*/*snowdrift*/*hawk-dove*.

In [1]:
#| default_exp Environments/MultipleObsSocialDilemma

In [2]:
#| hide
# Imports for the nbdev development environment
from nbdev.showdoc import *

In [3]:
#| hide
%load_ext autoreload
%autoreload 2

In [4]:
#| export
from pyCRLD.Environments.Base import ebase

from fastcore.utils import *
from fastcore.test import *

from pyCRLD.Environments.MultipleObservationsEnv import MultipleObservationsEnv

import numpy as np

In [5]:
#| export
class MultipleObsSocialDilemma(MultipleObservationsEnv):
    """
    Symmetric 2-agent 2-action Social Dilemma Matrix Game.
    """ 

    def __init__(self,
                 reward:float,  # reward of mutual cooperation
                 temptation:float,  # temptation of unilateral defection 
                 suckers_payoff:float,  # sucker's payoff of unilateral cooperation
                 punishment:float): # punsihment of mutual defection


        self.reward = reward
        self.temptation = temptation
        self.suckers_payoff = suckers_payoff
        self.punishment = punishment

        # TODO: these variables are expected to be already initialized in the parent class
        # causing a recursive calling and causing this initialization to fail
        self.n_agents = 2
        self.n_agent_actions = 2
        self.n_states = 1

        self.state = 0 # inital state
        super().__init__()

In [6]:
#| export
@patch

@patch
def transition_tensor(self:SocialDilemma):
    """Calculate the Transition Tensor"""
    return np.ones((self.n_states, self.n_agent_actions, self.n_agent_actions, self.n_states))

@patch
def reward_tensor(self:SocialDilemma):
    """Get the Reward Tensor R[i,s,a1,...,aN,s']."""

    R = np.zeros((2, self.n_states, 2, 2, self.n_states))

    R[0, 0, :, :, 0] = [[self.reward , self.suckers_payoff],
                        [self.temptation , self.punishment]]
    R[1, 0, :, :, 0] = [[self.reward , self.temptation],
                        [self.suckers_payoff , self.punishment]]

    return R

# %% ../../nbs/Environments/10_EnvSocialDilemma.ipynb 9
@patch
def actions(self:SocialDilemma):
    """The action sets"""
    return [['c', 'd'] for _ in range(self.n_agents)]

# %% ../../nbs/Environments/10_EnvSocialDilemma.ipynb 10
@patch
def states(self:SocialDilemma):
    """The states set"""
    return ['.'] 

# %% ../../nbs/Environments/10_EnvSocialDilemma.ipynb 11
@patch
def id(self:SocialDilemma):
    """
    Returns id string of environment
    """
    # Default
    id = f"{self.__class__.__name__}_"+\
        f"{self.temptation}_{self.reward}_{self.punishment}_{self.suckers_payoff}"
    return id

NameError: name 'SocialDilemma' is not defined

### Example

In [None]:
env = SocialDilemma(reward=1, temptation=2, suckers_payoff=-1, punishment=0)

In [None]:
env.id()

In [None]:
env

Reward matrix of agent `0`:

In [None]:
env.reward_tensor()[0,0,:,:,0]

Reward matrix of agent `1`:

In [None]:
env.reward_tensor()[1,0,:,:,0]

In [None]:
env.transition_tensor()

In [None]:
env.actions()

In [None]:
env.states()

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()