In [1]:
import numpy as np
import random

# --- Environment Setup ---
# States: 0=Fast, 1=Spin, 2=Bouncer
# Actions: 0=Hit, 1=Defend, 2=Leave
n_states = 3
n_actions = 3

# Rewards matrix: rows=states, columns=actions
# Positive reward for scoring runs, negative for getting out
# Example: Fast ball, Hit=+2, Defend=0, Leave=0
rewards = np.array([
    [2, 0, 0],   # Fast ball
    [1, 0, 0],   # Spin ball
    [3, 0, -1]   # Bouncer
])

# Q-table initialization (states x actions)
Q = np.zeros((n_states, n_actions))

# --- Q-learning parameters ---
learning_rate = 0.1
discount_factor = 0.9
epsilon = 0.2  # Exploration factor
episodes = 500

# --- Training ---
for episode in range(episodes):
    state = random.randint(0, n_states-1)  # Random ball type
    done = False
    
    while not done:
        # Choose action: explore or exploit
        if random.uniform(0, 1) < epsilon:
            action = random.randint(0, n_actions-1)  # Explore
        else:
            action = np.argmax(Q[state])  # Exploit best known action

        # Get reward
        reward = rewards[state, action]

        # Next state (random ball type)
        next_state = random.randint(0, n_states-1)

        # Q-learning update rule
        Q[state, action] = Q[state, action] + learning_rate * (
            reward + discount_factor * np.max(Q[next_state]) - Q[state, action]
        )

        state = next_state
        done = True  # One-step environment for simplicity

# --- Display learned Q-table ---
print("Learned Q-table (State x Action):")
print(Q)

# --- Test the agent ---
test_states = ['Fast', 'Spin', 'Bouncer']
actions = ['Hit', 'Defend', 'Leave']

for i, state in enumerate(test_states):
    best_action = np.argmax(Q[i])
    print(f"For {state} ball, best action: {actions[best_action]}")

Learned Q-table (State x Action):
[[15.70636506  8.51643183  6.82710591]
 [14.59865659  6.95689388  5.75437927]
 [16.44941146  6.04584663  5.68759737]]
For Fast ball, best action: Hit
For Spin ball, best action: Hit
For Bouncer ball, best action: Hit
