In [73]:
import random
from collections import namedtuple

# Set up Problem Parameters

In [74]:
N_QUESTIONS = 5
transition_probs = [1,0.8,0.6,0.5,0.3]
prize = [0,10,100,1000,2000,5000]

# Set Up Environment

In [75]:
# state represent by question number. 0 for start of game, arbitrary placeholder. -1 for end of game.
State = namedtuple('State', ['question_number'])
Transition = namedtuple('Transition', ['prob', 'next_state'])
Result = namedtuple('Result', ['action', 'value'])

In [76]:
# Possible Actions

In [77]:
def possible_actions(curr_state: State) -> list[str]:
    """returns possible actions
    """
    if curr_state.question_number == 0:
        return ['Y']
    elif curr_state.question_number == -1:
        return ['N']
    else:
        return ['Y','N']

# Bellman Equation

In [78]:
N_QUESTIONS = 5
transition_probs = [1,0.8,0.6,0.5,0.3]
prize = [0,10,100,1000,2000,5000]
def create_lookup_table(n_questions:int):
    return [Result(None,0) for _ in range(n_questions+2)]
x = create_lookup_table(N_QUESTIONS)


In [79]:
# test for lookup table
for i, question in enumerate(x):
        print(f"{(i,question)}")

(0, Result(action=None, value=0))
(1, Result(action=None, value=0))
(2, Result(action=None, value=0))
(3, Result(action=None, value=0))
(4, Result(action=None, value=0))
(5, Result(action=None, value=0))
(6, Result(action=None, value=0))


In [80]:
lookup = create_lookup_table(N_QUESTIONS)
def bellman_equation(start_state: State):
    """
    """
    question_number = start_state.question_number
    # previously solved
    if lookup[question_number].action != None:
        return lookup[question_number]
    # base case
    if start_state.question_number <= -1:
        return Result('',0)
    # list out possible actions
    actions_poss = possible_actions(start_state) 
    # get the the expected value of each action which is the Immediate value + expected value of future states
    actions_vals = [expected_action_value(start_state, action) for action in actions_poss]
    # find the max value
    best_val, best_action = max(zip(actions_vals, actions_poss))
    # storing it in cache
    lookup[question_number] = Result(best_action, best_val)
    return lookup[question_number]

def expected_action_value(curr_state: State,action: str):
    # get the the expected value of each action 
    if action == 'N':
        return 0

    # which is the Immediate value + expected value of future states
    immediate_value = prize[curr_state.question_number]
    expected_value = immediate_value
    prob_success = transition_probs[curr_state.question_number]
    if curr_state.question_number != N_QUESTIONS:
        success_state = State(-1)
    else:
        success_state = State(curr_state.question_number + 1)
    failure_state = State(-1)
    transition_states = [Transition(prob = prob_success,next_state = success_state),
                         Transition(prob = 1-prob_success,next_state = failure_state)
                        ]
    for transition in transition_states:
        expected_value += transition.prob * bellman_equation(transition.next_state).value
    return expected_value

In [82]:
N_QUESTIONS = 5
transition_probs = [1,0.8,0.6,0.5,0.3]
prize = [0,10,100,1000,2000,5000]
start_state = State(0)
lookup = create_lookup_table(N_QUESTIONS)

bellman_equation(start_state)
lookup

[Result(action='Y', value=0),
 Result(action=None, value=0),
 Result(action=None, value=0),
 Result(action=None, value=0),
 Result(action=None, value=0),
 Result(action=None, value=0),
 Result(action=None, value=0)]