In [1]:
import pandas as pd
import numpy as np

In [2]:
class slot_machine:
    def __init__(self,win_prob,reward):
        self.win_prob = win_prob
        self.reward = reward

In [22]:
import numpy as np

class slot_machine:
    def __init__(self, win_prob, reward, jackpot_prob=0.01, jackpot_reward=1000):
        if len(win_prob) != len(reward):
            raise ValueError("win_prob and reward lists must be the same length.")
        if not np.isclose(sum(win_prob), 1.0):
            raise ValueError("Probabilities must sum to 1.")
        self.win_prob = np.array(win_prob)
        self.reward = np.array(reward)
        self.jackpot_prob = jackpot_prob
        self.jackpot_reward = jackpot_reward

    def __call__(self):
        reward = np.random.choice(self.reward, p=self.win_prob)
        if np.random.rand() < self.jackpot_prob:
            return reward + self.jackpot_reward, True
        else:
            return reward, False
        
    def calculate_expected_value(self):
        ev = sum([p * r for p, r in zip(self.win_prob, self.reward)]) + self.jackpot_prob * self.jackpot_reward
        return ev

In [53]:
balanced_machine = slot_machine(
    win_prob=[0.5, 0.3, 0.15, 0.05],
    reward=[-5, 10, 50, 100],
    jackpot_prob=0.01,
    jackpot_reward=500
)

# 2. Short-Term High Reward
short_term_high_reward = slot_machine(
    win_prob=[0.2, 0.2, 0.4, 0.2],
    reward=[-40, 20, 50, 100],
    jackpot_prob=0.005,
    jackpot_reward=200
)

# 3. Long-Term High Reward
long_term_high_reward = slot_machine(
    win_prob=[0.8, 0.1, 0.08, 0.02],
    reward=[-5, 10, 50, 100],
    jackpot_prob=0.02,
    jackpot_reward=1000
)

# 4. Low Risk, Low Reward
low_risk_low_reward = slot_machine(
    win_prob=[0.1, 0.6, 0.25, 0.05],
    reward=[-1, 10, 20, 40],
    jackpot_prob=0.001,
    jackpot_reward=100
)

# 5. High Risk, High Reward
high_risk_high_reward = slot_machine(
    win_prob=[0.85, 0.1, 0.03, 0.02],
    reward=[-30, 100, 150, 200],
    jackpot_prob=0.01,
    jackpot_reward=1000
)

machines = [balanced_machine, short_term_high_reward, long_term_high_reward, low_risk_low_reward, high_risk_high_reward]

for i, machine in enumerate(machines):
    ev = machine.calculate_expected_value()
    print(f"Machine {i+1} Expected Value: {ev:.2f}")

Machine 1 Expected Value: 18.00
Machine 2 Expected Value: 37.00
Machine 3 Expected Value: 23.00
Machine 4 Expected Value: 13.00
Machine 5 Expected Value: 3.00


In [54]:
class models:
    def __init__(self,machine):
        self.machine = machine
        self.reward = 0
        self.count = 1
        self.total_win = 0
    def reward_now(self):
        return self.reward
    def update_reward(self,outcome):
        self.reward = self.reward + (1/self.count)*(outcome-self.reward)
        self.count += 1
        self.total_win += outcome

In [70]:
machines = []

# 1. Balanced Machine
machines.append(models(balanced_machine))

# 2. Short-Term High Reward
machines.append(models(short_term_high_reward))

# 3. Long-Term High Reward
machines.append(models(long_term_high_reward))

# 4. Low Risk, Low Reward
machines.append(models(low_risk_low_reward))

# 5. High Risk, High Reward
machines.append(models(high_risk_high_reward))


In [71]:
epsilon = 0.99
episodes = 10000

total_money = 100

for i in range(episodes):
    prob = np.random.random()
    rewards = np.array([x.reward_now() for x in machines])

    if prob < epsilon:
        choice = machines[np.random.choice(range(len(machines)))]
    else:
        choice = machines[rewards.argmax()]
    
    output,jackpot = choice.machine()
    choice.update_reward(output)

In [72]:
for i in machines:
    print(i.total_win,i.count,i.reward)

33420 1970 16.973082783138658
79930 2132 37.50821210699198
41200 2011 20.497512437810915
24732 1935 12.788004136504632
1720 1957 0.8793456032719841
