# Risk Reward Dilemma
> This class models a two-state social dilemma where a single agent chooses between `risky` or `cautious` actions. The actions taken by the agent determine the probability of transitioning between `degraded` and `prosporus` states . In each state, the agent receives different rewards, reflecting the consequences of its chosen action!  

In [None]:
#| default_exp Environments/RiskReward

In [None]:
#| hide
# Imports for the nbdev development environment
from nbdev.showdoc import *

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Implementation

In [None]:
#| export
from pyCRLD.Environments.Base import ebase

from fastcore.utils import *
from fastcore.test import *

import numpy as np

In [None]:
#| export
class RiskReward(ebase):
    """
    An MDP model for decision-making under uncertainty with two states 
    (prosperous and degraded) and two actions (cautious and risky).
    """
    
    def __init__(self, pc:float, pr:float, rs:float, rr:float, rd:float):
        self.pc = pc  # Collapse probability when risky in prosperous
        self.pr = pr  # Recovery probability when cautious in degraded
        self.rs = rs  # Reward for staying prosperous and cautious
        self.rr = rr  # Reward for staying prosperous but risky
        self.rd = rd  # Reward when in degraded state
        
        self.N = 1  # Number of agents
        self.M = 2  # Number of actions
        self.Z = 2  # Number of states
        self.state = 0  # Start in the prosperous state (index 0)
        
        super().__init__()

   

In [None]:
#| export
@patch
def TransitionTensor(self:RiskReward):
        """
        Define the Transition Tensor for the MDP.
        """
        T = np.zeros((self.Z, self.M, self.Z))
        T[0, 0, 0] = 1       # Prosperous and cautious stays prosperous
        T[0, 1, 0] = 1 - self.pc  # Prosperous and risky may stay
        T[0, 1, 1] = self.pc      # Prosperous and risky may collapse
        T[1, 0, 0] = self.pr      # Degraded and cautious may recover
        T[1, 0, 1] = 1 - self.pr  # Degraded and cautious may stay
        T[1, 1, 1] = 1       # Degraded and risky stays degraded
        return T




In [None]:
#| export
@patch
def RewardTensor(self:RiskReward):
        """
        Define the Reward Tensor for the MDP.
        """
        R = np.zeros((self.N, self.Z, self.M, self.Z))
        R[0, 0, 0, 0] = self.rs  # Prosperous and cautious
        R[0, 0, 1, 0] = self.rr  # Prosperous and risky but stays
        R[0, 0, 1, 1] = self.rd  # Prosperous and risky but collapses
        R[0, 1, :, :] = self.rd  # Degraded state rewards
        return R



In [None]:
#| export
@patch
def actions(self:RiskReward):
        """
        Define the actions available in the MDP.
        """
        return [['cautious', 'risky']]



In [None]:
#| export
@patch
def states(self:RiskReward):
        """
        Define the states of the MDP.
        """
        return ['prosperous', 'degraded']



In [None]:
#| export
@patch
def id(self:RiskReward):
        """
        Provide an identifier for the environment.
        """
        return f"{self.__class__.__name__}_pc{self.pc}_pr{self.pr}_rs{self.rs}_rr{self.rr}_rd{self.rd}"

# Example

In [None]:
env = RiskReward(pc=0.3,pr=.1,rs=0.6,rr=0.8,rd=0.001)
# pc, pr, rs, rr, rd

In [None]:
env.id()

'RiskReward_pc0.3_pr0.1_rs0.6_rr0.8_rd0.001'

In [None]:
env.TransitionTensor()

array([[[1. , 0. ],
        [0.7, 0.3]],

       [[0.1, 0.9],
        [0. , 1. ]]])

In [None]:
env.RewardTensor()[0]

array([[[0.6  , 0.   ],
        [0.8  , 0.001]],

       [[0.001, 0.001],
        [0.001, 0.001]]])

In [None]:
env.actions()

[['cautious', 'risky']]

In [None]:
env.states()

['prosperous', 'degraded']

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()