##Simple offline MDP vs random probabilities


The Markov Decision Process (MDP) for the slot machine is based on the following details:

**States:**

S = {Win, Lose}
Transitions:

**Actions:**
From Win:
With probability 0.75, receive reward of \$2 and stay in Win.
With probability 0.25, receive reward of \$0 and transition to Lose.
From Lose:
With probability 0.25, receive reward of \$0 and stay in Lose.
With probability 0.75, receive reward of \$2 and transition to Win.

In [None]:
import random

# Define the states
states = ["Win", "Lose"]

# Simulation
def slot_machine_simulation(num_trials=1000):
    current_state = "Win"  # Starting state
    total_reward = 0       # Accumulate the rewards

    for _ in range(num_trials):
        if current_state == "Win":
            if random.uniform(0, 1) <= 0.75:
                total_reward += 2
            else:
                current_state = "Lose"
        else:
            if random.uniform(0, 1) <= 0.25:
                pass
            else:
                total_reward += 2
                current_state = "Win"

    return total_reward

# Simulation with random winning and losing probabilities
def random_probability_simulation(num_trials=1000):
    current_state = "Win"  # Starting state
    total_reward = 0       # Accumulate the rewards

    # Generate random winning probability
    win_prob = random.uniform(0, 1)
    lose_prob = 1 - win_prob

    print(f"Randomly generated win probability: {win_prob:.2f}")
    print(f"Randomly generated lose probability: {lose_prob:.2f}")

    for _ in range(num_trials):
        if current_state == "Win":
            if random.uniform(0, 1) <= win_prob:
                total_reward += 2
            else:
                current_state = "Lose"
        else:
            if random.uniform(0, 1) <= lose_prob:
                pass
            else:
                total_reward += 2
                current_state = "Win"

    return total_reward

# Simulate the MDP for 1000 times
total_winning1 = slot_machine_simulation()
total_winning2 = random_probability_simulation()
print(f"Total winnings after 1000 tries with random probabilities: ${total_winning2}")
print(f"Total winnings after 1000 tries (MDP: Win 0.75, Lose 0.25): ${total_winning1}")


Randomly generated win probability: 0.51
Randomly generated lose probability: 0.49
Total winnings after 1000 tries with random probabilities: $1066
Total winnings after 1000 tries (MDP: Win 0.75, Lose 0.25): $1502
