# Uncertain Social Dilemma

> Class for two states social dilemma with partial observing agents

In [None]:
#| default_exp Environments/UncertainSocialDilemma

In [None]:
#| hide
# Imports for the nbdev development environment
from nbdev.showdoc import *

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

# Example

In [None]:
from pyCRLD.Environments.UncertainSocialDilemma import UncertainSocialDilemma
from pyCRLD.Agents.POStrategyActorCritic import POstratAC
from pyCRLD.Utils import FlowPlot as fp
import numpy as np

In [None]:
env = UncertainSocialDilemma(R1=5, T1=6, S1=-1, P1=0, R2=5, T2=2, S2=-1, P2=0, pC=0.5, obsnoise=0.5)
env

UncertainSocialDilemma_2_5_6_-1_0_5_2_-1_0_0.5_0.5

In the prosperous state, the rewards are a tragedy Prisoners' Dilemma.

In [None]:
env.R[0,1,:,:,1], env.R[1,1,:,:,1]

(array([[ 5., -1.],
        [ 2.,  0.]]),
 array([[ 5.,  2.],
        [-1.,  0.]]))

In [None]:
env.TransitionTensor()

array([[[[0.5, 0.5],
         [0.5, 0.5]],

        [[0.5, 0.5],
         [0.5, 0.5]]],


       [[[0.5, 0.5],
         [0.5, 0.5]],

        [[0.5, 0.5],
         [0.5, 0.5]]]])

In [None]:
# Init enviornment and MultiAgentEnvironment-interface
"""
mae = POstratAC(env=env, learning_rates=0.1, discount_factors=0.9)

x = ([0], [0,1], [0])  # Plotting on the x-axis the [0]'s agents probability in both states [0,1] to cooprate [0]
y = ([1], [0,1], [0])  # Plotting on the y-axis the [1]'s agents probability in both states [0,1] to cooprate [0]
ax = fp.plot_strategy_flow(mae, x, y, flowarrow_points = np.linspace(0.01 ,0.99, 9), NrRandom=16)
"""

"\nmae = POstratAC(env=env, learning_rates=0.1, discount_factors=0.9)\n\nx = ([0], [0,1], [0])  # Plotting on the x-axis the [0]'s agents probability in both states [0,1] to cooprate [0]\ny = ([1], [0,1], [0])  # Plotting on the y-axis the [1]'s agents probability in both states [0,1] to cooprate [0]\nax = fp.plot_strategy_flow(mae, x, y, flowarrow_points = np.linspace(0.01 ,0.99, 9), NrRandom=16)\n"

## Implementation

In [None]:
#| export
from pyCRLD.Environments.Base import ebase
from pyCRLD.Utils.Helpers import make_variable_vector

from fastcore.utils import *
from fastcore.test import *

from typing import Iterable
import numpy as np

In [None]:
#| export
class UncertainSocialDilemma(ebase):

    def __init__(self, R1, T1, S1, P1, R2, T2, S2, P2, pC, obsnoise):
        self.N = 2
        self.M = 2
        self.Z = 2

        self.R1 = R1
        self.T1 = T1
        self.S1 = S1    
        self.P1 = P1    

        self.R2 = R2
        self.T2 = T2
        self.S2 = S2    
        self.P2 = P2    
        
        self.pC = pC  # prop. contract
        if not hasattr(obsnoise, "__iter__"):
            self.noise = np.array([obsnoise, obsnoise])
        else:
            assert len(obsnoise) == 2
            self.obsnoise = np.array(obsnoise)
        assert min(self.noise) >= 0.0

        # --
        self.T = self.TransitionTensor()
        self.R = self.RewardTensor()
        self.O = self.ObservationTensor()
        self.state = 1 # inital state
        super().__init__()

In [None]:
#| export
@patch
def actions(self:UncertainSocialDilemma):
        return [0, 1], ["coop.", "defect."]

In [None]:
#| export
@patch
def states(self:UncertainSocialDilemma):
        return [0, 1], ["no contract", "contract"]


In [None]:
#| export
@patch
def TransitionTensor(self:UncertainSocialDilemma):
        """Get the Transition Tensor."""
        Tsas = np.ones((2, 2, 2, 2)) * (-1)

        Tsas[:, :, :, 0] = 1-self.pC
        Tsas[:, :, :, 1] = self.pC

        return Tsas

In [None]:
#| export
@patch
def RewardTensor(self:UncertainSocialDilemma):
        """Get the Reward Tensor R[i,s,a1,...,aN,s']."""

        R = np.zeros((2, 2, 2, 2, 2))

        R[0, 0, :, :, 0] = [[self.R1, self.S1],
                            [self.T1, self.P1]]
        R[1, 0, :, :, 0] = [[self.R1, self.T1],
                            [self.S1, self.P1]]
        R[:, 0, :, :, 1] = R[:, 0, :, :, 0]

        R[0, 1, :, :, 1] = [[self.R2, self.S2],
                            [self.T2, self.P2]]
        R[1, 1, :, :, 1] = [[self.R2, self.T2],
                            [self.S2, self.P2]]
        R[:, 1, :, :, 0] = R[:, 1, :, :, 1]

        return R

In [None]:
#| export
@patch
def ObservationTensor(self:UncertainSocialDilemma):

        if np.all(self.noise > 0.5):
            self.Q = 1
            Oiso = np.ones((self.N, self.Z, self.Q))
            
        else:
            self.Q = self.Z
            Oiso = np.zeros((self.N, self.Z, self.Q))

            for i in range(self.N):
                Oiso[i,0,0] = 1 - min(self.noise[i], 0.5)
                Oiso[i,0,1] = 0 + min(self.noise[i], 0.5)
                Oiso[i,1,0] = 0 + min(self.noise[i], 0.5)
                Oiso[i,1,1] = 1 - min(self.noise[i], 0.5)
            
        return Oiso

In [None]:
#| export
@patch
def id(self:UncertainSocialDilemma):
    """
    Returns id string of environment
    """
    # Default
    R1 = self.R1
    T1 = self.T1
    S1 = self.S1
    P1 = self.P1 
    R2 = self.R1 
    T2 = self.T2
    S2 = self.S2
    P2 = self.P2 
    pC = self.pC 
    noise = self.noise if len(np.unique(self.noise))>1 else self.noise[0]

    id = f"{self.__class__.__name__}_"+\
        f"{self.N}_{str(R1)}_{str(T1)}_{str(S1)}_{str(P1)}_{str(R2)}_{str(T2)}_{str(S2)}_{str(P2)}_{str(pC)}_{str(noise)}"
    return id

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()