In [None]:
import numpy as np
import matplotlib.pyplot as plt

probability_win = 0.55
DISCOUNT_FACTOR = 1
reward_table = np.zeros(101)
reward_table[100] = 1

class StrategyUpdater:
    def __init__(self):
        self.value_states = np.zeros(101)
        self.optimal_strategy = np.zeros(100, dtype=np.int)

    def calculate_value(self, position, bet, value_states):
        win_outcome = probability_win * (reward_table[position + bet] + DISCOUNT_FACTOR * value_states[position + bet])
        lose_outcome = (1 - probability_win) * (reward_table[position - bet] + DISCOUNT_FACTOR * value_states[position - bet])
        return win_outcome + lose_outcome

    def evaluate_strategy(self, tolerance=1e-14):
        while True:
            # TODO: Evaluate state values for the current strategy until convergence
            # Convergence: The difference in state values between iterations is less than tolerance
            delta = 0
            new_value_states = self.value_states.copy()

            for position in range(1, 100):
                bet = self.optimal_strategy[position]
                if bet > 0:
                    new_value_states[position] = self.calculate_value(position, bet, self.value_states)

                delta = max(delta, abs(new_value_states[position] - self.value_states[position]))

            self.value_states = new_value_states

            if delta < tolerance:
                break

        return self.update_strategy()

    def evaluate_strategy_with_equations(self):
        # TODO: Implement strategy evaluation using a system of linear equations

        return self.update_strategy()

    def update_strategy(self):
        # TODO: Implement the strategy improvement algorithm
        policy_changed = False
        for position in range(1, 100):
            best_bet = 0
            best_value = 0
            # new_value_states = self.value_states.copy()
            
            for bet in range(1, min(position, 100 - position) + 1):
                value = self.calculate_value(position, bet, self.value_states)
                if value > best_value:
                    best_value = value
                    best_bet = bet
            if self.optimal_strategy[position] != best_bet:
                policy_changed = True
            self.optimal_strategy[position] = best_bet
            if policy_changed:
                return self.evaluate_strategy()
        return self.value_states, self.optimal_strategy


In [None]:
su = StrategyUpdater()
value_estimates, final_strategy = su.evaluate_strategy()
# TODO: perform this once by solving equations too
print(value_estimates)
print(final_strategy)

# Plotting the value estimates
plt.plot(range(100), value_estimates[:100])
plt.xlabel('Capital')
plt.ylabel('Value Estimates')
plt.title('Value Estimates vs. Capital')
plt.grid(True)
plt.show()

# Plotting the final policy
plt.bar(range(100), final_strategy, align='center', alpha=0.5)
for idx in range(100):
    plt.text(idx - 0.75, final_strategy[idx] + 0.01, str(round(value_estimates[idx], 2)), fontsize=6)
plt.xlabel('Capital')
plt.xticks(np.arange(0, 101, 5))
plt.ylabel('Final Policy (Stake)')
plt.title('Final Policy vs. Capital')
plt.grid(axis='y')
plt.show()

In [None]:
# Implementing value iteration and strategy improvement algorithms
probability_win = 0.55
DISCOUNT_FACTOR = 1
reward_table = np.zeros(101)
reward_table[100] = 1

class ValueCalculator:
    def __init__(self):
        self.value_states = np.zeros(101, dtype=np.float64)
        self.optimal_strategy = np.zeros(100, dtype=np.float64)

    def calculate_value(self, position, bet, value_states):
        win_outcome = probability_win * (reward_table[position + bet] + DISCOUNT_FACTOR * value_states[position + bet])
        lose_outcome = (1 - probability_win) * (reward_table[position - bet] + DISCOUNT_FACTOR * value_states[position - bet])
        return win_outcome + lose_outcome

    def perform_value_iteration(self, tolerance=1e-50):
        # TODO: Implement the value iteration algorithm
        while True:
            delta = 0
            new_value_states = self.value_states.copy()

            for position in range(1, 100):  # Skip terminal states
                best_value = 0

                # Iterate over all valid bets
                for bet in range(1, min(position, 100 - position) + 1):
                    value = self.calculate_value(position, bet, self.value_states)
                    best_value = max(best_value, value)

                new_value_states[position] = best_value
                delta = max(delta, abs(self.value_states[position] - best_value))

            self.value_states = new_value_states

            # Check for convergence
            if delta < tolerance:
                break
        return self.perform_strategy_update()

    def perform_strategy_update(self):
        # TODO: Implement the strategy improvement algorithm
        for position in range(1, 100):
            best_bet = 0
            best_value = 0

            # Iterate over all valid bets
            for bet in range(1, min(position, 100 - position) + 1):
                value = self.calculate_value(position, bet, self.value_states)
                if value > best_value:
                    best_value = value
                    best_bet = bet

            self.optimal_strategy[position] = best_bet
        return self.value_states, self.optimal_strategy


In [None]:
# Initialize and run value iteration
vc = ValueCalculator()
value_estimates, final_strategy = vc.perform_value_iteration()
print(value_estimates)
print(final_strategy)

# Plotting the value estimates
plt.plot(range(100), value_estimates[:100])
plt.xlabel('Capital')
plt.ylabel('Value Estimates')
plt.title('Value Estimates vs. Capital')
plt.grid(True)
plt.show()

# Plotting the final policy
plt.bar(range(100), final_strategy, align='center', alpha=0.5)
for idx in range(100):
    plt.text(idx - 0.75, final_strategy[idx] + 0.01, str(round(value_estimates[idx], 2)), fontsize=6)
plt.xlabel('Capital')
plt.xticks(np.arange(0, 101, 5))
plt.ylabel('Final Policy (Stake)')
plt.title('Final Policy vs. Capital')
plt.grid(axis='y')
plt.show()
