# Markov Knapsack

Let's take a knapsack with uncertain costs. Meaning when you make the decision to include or exclude, we flip a coin and that determines how much you pay.

In [26]:
import random
from collections import namedtuple

n = 5
values = [40, 30, 30, 30, 50]
costs1 = [1, 2, 3, 4, 5]
costs2 = [5, 4, 3, 2, 1]
budget = 10


We are going to want to create a couple of variables in our environment. We need the state because as we know, we're going to be using the bellman equation.

$V(s) = \underset{a}{max} \left[F(s,a) + V(s_a) \right]$

In [27]:
State = namedtuple('State', ['index', 'budget'])
Transition = namedtuple('Transition', ['prob', 'next_state'])
Result = namedtuple('Result', ['action', 'value'])

In [28]:
test_state = State(3,5)
costs1[test_state.index]
costs2[test_state.index]

2

In [29]:
# test for transition tuple
t = Transition(0.5,State(4,3))
t.next_state

State(index=4, budget=3)

In [30]:
# What actions do we have available
def possible_actions(state):
    if state.index >= len(costs1):
        return None
    max_cost = max(costs1[state.index], costs2[state.index])
    if state.budget < max_cost:
        return ['N']
    else:
        return ['N', 'Y']

def possible_actions_test():
    test_values = [State(4,0), State(4,20), State(4,4), State(4,5), State(5,0)]
    expected_results = [['N'], ['N','Y'], ['N'], ['N','Y'],None]
    actual_results = [possible_actions(val) for val in test_values]
    for i,j,s in zip(expected_results,actual_results,test_values):
        if i != j:
            print(f"Error for case {s}. \nExpected result was {i}. \nActual result was {j}")
possible_actions_test()

In [31]:
print(values)
print(costs1)
print(costs2)

[40, 30, 30, 30, 50]
[1, 2, 3, 4, 5]
[5, 4, 3, 2, 1]


In [32]:
def immediate_value(state, action):
    if action == 'N':
        return 0
    else:
        return values[state.index]

In [33]:
def transitions(state, action):
    if action == 'N':
        # If action is no then we have certainty so prob = 1
        return [Transition(1, State(state.index+1, state.budget))]
    else:
        prob1 = 0.5
        state1 = State(state.index+1, state.budget - costs1[state.index])
        
        prob2 = 0.5
        state2 = State(state.index+1, state.budget - costs2[state.index])
        return [
            Transition(prob1, state1),
            Transition(prob2, state2)
        ]

In [34]:
def expected_action_value(state, action):
    action_value = immediate_value(state, action)
    action_value += sum(trans.prob * knapsack(trans.next_state).value
                        for trans in transitions(state, action)) # add on expected value of future states using recursive search
    return action_value

In [35]:
lookup = {}
def knapsack(state):
    if state.index >= n:
        return Result('', 0)
    if state not in lookup:
        # list out possible actions
        actions_poss = possible_actions(state) 
        # get the the expected value of each action which is the Immediate value + expected value of future states
        actions_vals = [expected_action_value(state, action) for action in actions_poss]
        # find the max value
        best_val, best_action = max(zip(actions_vals, actions_poss))
        # storing it in cache
        lookup[state] = Result(best_action, best_val)
    return lookup[state]


In [37]:

print(f'Costs1: {costs1}')
print(f'Costs2: {costs2}')
print(f'Values: {values}')
print(f'Budget: {budget}')

init_state = State(0, budget)
opt = knapsack(init_state)
print(f'Optimal: {opt}')

Costs1: [1, 2, 3, 4, 5]
Costs2: [5, 4, 3, 2, 1]
Values: [40, 30, 30, 30, 50]
Budget: 10
Optimal: Result(action='Y', value=107.5)


In [38]:
second_state=State(1,5)
lookup[second_state]

Result(action='N', value=50.0)

In [41]:
# for state,res in lookup.items():
#     print(state, res)

for i in range(20):
    print(f'sim {i}:\n')
    state = init_state
    for _ in range(n):
        res = knapsack(state)
        print(state, res)
        state = random.choice(transitions(state, res.action)).next_state

sim 0:

State(index=0, budget=10) Result(action='Y', value=107.5)
State(index=1, budget=5) Result(action='N', value=50.0)
State(index=2, budget=5) Result(action='N', value=50.0)
State(index=3, budget=5) Result(action='N', value=50.0)
State(index=4, budget=5) Result(action='Y', value=50.0)
sim 1:

State(index=0, budget=10) Result(action='Y', value=107.5)
State(index=1, budget=5) Result(action='N', value=50.0)
State(index=2, budget=5) Result(action='N', value=50.0)
State(index=3, budget=5) Result(action='N', value=50.0)
State(index=4, budget=5) Result(action='Y', value=50.0)
sim 2:

State(index=0, budget=10) Result(action='Y', value=107.5)
State(index=1, budget=9) Result(action='Y', value=85.0)
State(index=2, budget=7) Result(action='Y', value=60.0)
State(index=3, budget=4) Result(action='Y', value=30.0)
State(index=4, budget=0) Result(action='N', value=0)
sim 3:

State(index=0, budget=10) Result(action='Y', value=107.5)
State(index=1, budget=9) Result(action='Y', value=85.0)
State(index

# Fishing for Salmon Example

lets take an example where we are fishing for salmon

In [5]:
import random
from collections import namedtuple
index_mapping = {'e': 0,
                 'l': 1,
                 'm':2,
                 'h':3
                }

# We always have these three things
State = namedtuple("State",['fish_level','index'])
Transition = namedtuple('Transition', ['prob', 'next_state'])
Result = namedtuple('Result', ['action', 'value'])

FISHING_RETURNS = {'e': 0,
                   'l': 5,
                   'm': 10,
                   'h': 50
                  }
def possible_actions(state: State):
    """
    Creates the possible actions Fish or don't fish depending on the state.
    N represents decision don't fish. F represents the decision to fish
    """
    if state.fish_level == 'e':
        return ['N']
    return ['N','F']

def immediate_value(state: State, action: str):
    if state.fish_level == 'e':
        return -200
    if action == "N":
        return 0
    if action == 'F':
        return FISHING_RETURNS[state.fish_level]


In [12]:
reverse_index = {value:key for key,value in list(index_mapping.items())}

In [16]:
FISHING_TRANSITION_MATRIX = [[0,1,0,0],
                             [0.75, 0.25, 0, 0],
                             [0, 0.75, 0.25, 0],
                             [0, 0, 0.6, 0.5]
                            ]

NO_FISH_TRANSITION_MATRIX = [[0,1,0,0],
                             [0, 0.3, 0.7, 0],
                             [0, 0, 0.25, 0.75],
                             [0, 0, 0.05, 0.95]
                            ]


def transitions(state: State, action: str):
    if state.fish_level == 'e':
        return [Transition(1, State(fish_level= 'l', index = index_mapping['l']))]

    if action == 'N':
        probability_row = NO_FISH_TRANSITION_MATRIX[state.index]  
    else: 
        probability_row = FISHING_TRANSITION_MATRIX[state.index]
    result = []
    for to_idx, probability in enumerate(probability_row):
        if probability:
            #reverse index matching
            new_state = State(reverse_index[to_idx], to_idx)
            result.append(Transition(probability,new_state))
    return result

In [19]:
def test_state_and_actions():
    for level in index_mapping:
        test_state = State(level, index_mapping[level])
        actions = possible_actions(test_state)
        for a in actions:
            print(f"test state: {test_state}\n"
            f"action: {a}\n"
            f"value: {immediate_value(test_state,a)}\n"
            f"transitions: {transitions(test_state, a)}\n\n\n")
test_state_and_actions()

test state: State(fish_level='e', index=0)
action: N
value: -200
transitions: [Transition(prob=1, next_state=State(fish_level='l', index=1))]



test state: State(fish_level='l', index=1)
action: N
value: 0
transitions: [Transition(prob=0.3, next_state=State(fish_level='l', index=1)), Transition(prob=0.7, next_state=State(fish_level='m', index=2))]



test state: State(fish_level='l', index=1)
action: F
value: 5
transitions: [Transition(prob=0.75, next_state=State(fish_level='e', index=0)), Transition(prob=0.25, next_state=State(fish_level='l', index=1))]



test state: State(fish_level='m', index=2)
action: N
value: 0
transitions: [Transition(prob=0.25, next_state=State(fish_level='m', index=2)), Transition(prob=0.75, next_state=State(fish_level='h', index=3))]



test state: State(fish_level='m', index=2)
action: F
value: 10
transitions: [Transition(prob=0.75, next_state=State(fish_level='l', index=1)), Transition(prob=0.25, next_state=State(fish_level='m', index=2))]



test state: 