# Markov Decision Process

In [1]:
import numpy as np

In [2]:
class GameShow:
    def __init__(self):
        # Define the states of the game
        self.states = ['Q1', 'Q2', 'Q3', 'Q4', 'end', 'lost']
        
        # Define the available actions and their corresponding values
        self.actions = {'play': 1, 'quit': 0}
        
        # Define the rewards associated with each state
        self.rewards = {'Q1': 100, 'Q2': 1000, 'Q3': 10000, 'Q4': 50000, 'end': 0, 'lost': 0}
        
        # Define the probabilities of success for transitioning to the next state
        self.probs = {'Q1': 0.9, 'Q2': 0.75, 'Q3': 0.5, 'Q4': 0.1}
        
        # Initialize the current state and current return variables
        self.currentState = 'Q1'
        self.currentReturn = 0
        
        # Initialize the random number generator
        self.rng = np.random.default_rng()
        
    def resetGame(self):
        # Reset the game to the initial state
        self.currentState = 'Q1'
        self.currentReturn = 0
        
    def play(self, action):
        if self.currentState == 'end':
            print('You cannot play anymore.')
        elif action == 0:
            # If action is 'quit', transition to the end state
            self.currentState = 'end'
        elif action == 1:
            if self.rng.random() < self.probs[self.currentState]:
                # If action is 'play' and success probability is met, transition to the next state
                newIndex = self.states.index(self.currentState) + 1
                self.currentState = self.states[newIndex]
            else:
                # If action is 'play' and success probability is not met, transition to the lost state
                self.currentState = 'lost'
                self.currentReturn = 0
        
        # Update the current return with the reward associated with the current state
        self.currentReturn += self.rewards[self.currentState]


In [3]:
# Create a GameShow instance
myShow = GameShow()

In [4]:
# Print the initial state and current return of the game
print(myShow.currentState, myShow.currentReturn)

# Play the game for 10 iterations
for _ in range(10):
    # Take action 'play'
    myShow.play(action=1)
    
    # Print the current state and current return of the game
    print(myShow.currentState, myShow.currentReturn)
    
    # Check if the game has reached an end state or lost state
    if myShow.currentState in ['end', 'lost']:
        # Reset the game to its initial state
        myShow.resetGame()
        
        # Print the current state and current return after resetting the game
        print(myShow.currentState, myShow.currentReturn)

Q1 0
Q2 1000
Q3 11000
Q4 61000
lost 0
Q1 0
Q2 1000
Q3 11000
Q4 61000
lost 0
Q1 0
Q2 1000
Q3 11000


In [5]:
class Agent:
    def __init__(self, task, initPredMat=True):
        self.task = task
        self.rng = np.random.default_rng()
        
        if initPredMat:
            self.predMat = np.full(5, 0.5)  # Initialize the prediction matrix with default values
        else:
            self.predMat = initPredMat  # Use the provided prediction matrix
    
    def playGame(self, T, out=True):
        t = 0
        if out:
            print(self.task.currentState, self.task.currentReturn)  # Print the initial state and current return of the game
        
        while t < T:
            state = self.task.currentState  # Get the current state of the game
            stateId = self.task.states.index(self.task.currentState)  # Get the index of the current state
            
            # Generate a random number and compare it with the corresponding prediction probability
            cond = self.rng.random() < self.predMat[stateId]
            
            action = 1 if cond else 0  # Choose the action based on the condition
            self.task.play(action)  # Play the game by taking the chosen action
            
            if out:
                print(self.task.currentState, self.task.currentReturn)  # Print the current state and current return of the game
            
            if self.task.currentState in ['end', 'lost']:
                self.task.resetGame()  # Reset the game if it has reached an end or lost state
            
            t += 1  # Increment the time step

In [6]:
# Create an agent
player = Agent(myShow)

In [7]:
myShow.resetGame() # reset the game
player.playGame(100) # play the game for 100 questions

Q1 0
end 0
Q2 1000
Q3 11000
lost 0
end 0
Q2 1000
end 1000
end 0
end 0
end 0
end 0
Q2 1000
Q3 11000
end 11000
Q2 1000
end 1000
Q2 1000
Q3 11000
end 11000
Q2 1000
end 1000
end 0
end 0
end 0
end 0
Q2 1000
Q3 11000
end 11000
end 0
end 0
Q2 1000
Q3 11000
lost 0
Q2 1000
end 1000
Q2 1000
lost 0
Q2 1000
lost 0
end 0
end 0
end 0
Q2 1000
end 1000
Q2 1000
end 1000
end 0
Q2 1000
Q3 11000
Q4 61000
end 61000
Q2 1000
end 1000
Q2 1000
lost 0
end 0
Q2 1000
Q3 11000
Q4 61000
lost 0
end 0
end 0
end 0
Q2 1000
lost 0
Q2 1000
Q3 11000
lost 0
Q2 1000
Q3 11000
end 11000
end 0
Q2 1000
Q3 11000
end 11000
end 0
end 0
Q2 1000
end 1000
end 0
Q2 1000
end 1000
Q2 1000
Q3 11000
lost 0
Q2 1000
end 1000
end 0
end 0
end 0
end 0
end 0
Q2 1000
end 1000
Q2 1000
end 1000
Q2 1000
end 1000
lost 0
end 0
