In [None]:
!pip install pygame



# **Q-Learning Bouncing Ball Game**
This game implements a Q-learning algorithm to control paddle movement
and features multiple game mechanics including rewards and penalties.

##1.   Initialization and configuration section
##2.   Game variable initialization
##3.   Q-learning parameter Settings
##4.   Game object initialization function
##5.   Q-learning related function
##6.   Game drawing and mechanic functions
##7.   Main game loop

In [None]:
import pygame
import random
import numpy as np

pygame 2.6.1 (SDL 2.28.4, Python 3.11.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


##Initialization and configuration section



In [None]:
# Initialize Pygame
pygame.init()

# Screen dimensions
SCREEN_WIDTH, SCREEN_HEIGHT = 800, 600
screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
pygame.display.set_caption("Q-Learning Bouncing Ball Game")

# Colors
BACKGROUND_COLOR = (255, 209, 220)  # Light pink
BALL_COLOR = (255, 105, 180)        # Hot pink
PADDLE_COLOR = (255, 182, 193)      # Light pink
TEXT_COLOR = (255, 20, 147)         # Deep pink
REWARD_COLOR = (255, 255, 0)        # Yellow for rewards

# Fonts
font = pygame.font.SysFont("Arial", 36)
small_font = pygame.font.SysFont("Arial", 24)

##Game variable initialization

In [None]:
# Base game variable
clock = pygame.time.Clock()
running = True
current_screen = "menu"
score = 0
balls = []
paddle = {}
rewards = []

# Time-dependent variable
reward_timer = 0
reward_move_interval = 10000  # Rewards change every 10 seconds (in milliseconds)
time_elapsed = 0
game_timer = 60  # Countdown timer in seconds

# Game parameters
INITIAL_PADDLE_WIDTH = 100

##Q-learning parameter Settings

In [None]:
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate
Q = {}  # Q-table

# New variables for reward collision penalty
last_reward_collision_timer = 0
missed_reward_penalty = 5000

##Game object initialization function

In [None]:
def init_ball():
    return {
        "x": random.randint(100, SCREEN_WIDTH - 100),
        "y": 50,                                        # Fixed initial height
        "vx": random.choice([-4, -3, 3, 4]),            # Random horizontal speed
        "vy": random.choice([3, 4, 5]),                 # Random vertical speed
        "radius": 10                                    # Radius of the ball
    }

def init_paddle():
    return {"x": SCREEN_WIDTH // 2 - 50,     # Center place
            "width": INITIAL_PADDLE_WIDTH,   # Initial width
            "height": 10,                    # Fixed height
            "speed": 5}                      # Moving speed

def init_reward():
    return {
        "x": random.randint(100, SCREEN_WIDTH - 130),
        "y": random.randint(100, SCREEN_HEIGHT // 2),
        "size": random.randint(20, 40),
        "active": True,
        "type": random.choice(["score", "paddle_extend", "extra_ball"])
    }

##Q-learning related function

In [None]:
def get_state(balls, paddle):
    if not balls:
        return None

    # Find all balls that are moving down (a threat to the backstop)
    dangerous_balls = [b for b in balls if b["vy"] > 0]
    if not dangerous_balls:
        dangerous_balls = balls

    # Select the most dangerous ball (the ball that reaches the barrier the fastest)
    closest_ball = min(dangerous_balls,
                      key=lambda b: (SCREEN_HEIGHT - b["y"]) / abs(b["vy"]) if b["vy"] != 0 else float('inf'))

    # Discretize continuous location information
    ball_x = int(closest_ball["x"] // 50)
    ball_y = int(closest_ball["y"] // 50)
    ball_direction = 1 if closest_ball["vx"] > 0 else -1
    ball_vertical = 1 if closest_ball["vy"] > 0 else -1
    paddle_x = int(paddle["x"] // 50)

    # Consider the relative positions of the other balls
    other_balls_info = 0
    for ball in balls:
        if ball != closest_ball:
            rel_x = 1 if ball["x"] > closest_ball["x"] else -1
            rel_y = 1 if ball["y"] > closest_ball["y"] else -1
            other_balls_info += (rel_x + rel_y)

    return (ball_x, ball_y, paddle_x, ball_direction, ball_vertical, other_balls_info)

# Predict where all the balls will land
def predict_all_balls_landing(balls):
    predictions = []
    for ball in balls:
        if ball["vy"] > 0:
            time_to_paddle = (SCREEN_HEIGHT - ball["radius"] - ball["y"]) / ball["vy"]
            predicted_x = ball["x"] + (ball["vx"] * time_to_paddle)

            # Handle situations where the ball hits a wall and bounces
            while predicted_x < 0 or predicted_x > SCREEN_WIDTH:
                if predicted_x < 0:
                    predicted_x = -predicted_x
                if predicted_x > SCREEN_WIDTH:
                    predicted_x = 2 * SCREEN_WIDTH - predicted_x

            predictions.append({
                "x": predicted_x,
                "time": time_to_paddle,
                "ball": ball
            })

    # Sort by arrival time
    predictions.sort(key=lambda p: p["time"])
    return predictions

# Calculate the reward value of the current action
def calculate_reward(balls, paddle, action):
    reward = 0
    predictions = predict_all_balls_landing(balls)

    if not predictions:
        return 0

    paddle_center = paddle["x"] + paddle["width"] / 2
    # Calculate the reward based on the predicted landing of each ball
    for i, pred in enumerate(predictions):
        weight = 1.0 / (i + 1)
        distance_to_prediction = abs(pred["x"] - paddle_center)
        # Reward actions for correctness
        if pred["x"] > paddle_center and action == 1:
            reward += 2 * weight
        elif pred["x"] < paddle_center and action == -1:
            reward += 2 * weight
        elif abs(pred["x"] - paddle_center) < paddle["width"]/2 and action == 0:
            reward += 3 * weight
        # Reward or punish based on distance from predicted landing point
        if distance_to_prediction < paddle["width"]:
            reward += (paddle["width"] - distance_to_prediction) / paddle["width"] * 5 * weight
        elif distance_to_prediction > paddle["width"] * 1.5:
            reward -= 3 * weight

    return reward

# Select next action
def choose_action(state, balls, paddle):
    # Initialize the Q value of the state
    if state not in Q:
        Q[state] = {action: 0 for action in [-1, 0, 1]}

    predictions = predict_all_balls_landing(balls)
    if not predictions:
        return 0

    paddle_center = paddle["x"] + paddle["width"] / 2

    # Calculate the weighted average target position
    target_x = 0
    total_weight = 0
    for i, pred in enumerate(predictions):
        weight = 1.0 / (i + 1)
        target_x += pred["x"] * weight
        total_weight += weight
    target_x /= total_weight

    # 80% probability using simple strategy
    if random.uniform(0, 1) < 0.8:
        if abs(target_x - paddle_center) < paddle["width"]/4:
            return 0
        elif target_x < paddle_center:
            return -1 if paddle["x"] > 0 else 0
        else:
            return 1 if paddle["x"] + paddle["width"] < SCREEN_WIDTH else 0

    # 20% probability of using Q-learning strategy
    if random.uniform(0, 1) < epsilon:
        valid_actions = [-1, 0, 1]
        if paddle["x"] <= 0:
            valid_actions.remove(-1)
        if paddle["x"] + paddle["width"] >= SCREEN_WIDTH:
            valid_actions.remove(1)
        return random.choice(valid_actions)
    else:
        valid_actions = {a: Q[state][a] for a in [-1, 0, 1] if
                        (a != -1 or paddle["x"] > 0) and
                        (a != 1 or paddle["x"] + paddle["width"] < SCREEN_WIDTH)}
        return max(valid_actions, key=valid_actions.get)

# Update the Q value table
def update_q_table(state, action, reward, next_state):
    # Initialize the Q value of the state
    if state not in Q:
        Q[state] = {a: 0 for a in [-1, 0, 1]}
    if next_state not in Q:
        Q[next_state] = {a: 0 for a in [-1, 0, 1]}

    # Q-learning Update formula
    current_q = Q[state][action]
    max_next_q = max(Q[next_state].values())
    Q[state][action] = current_q + alpha * (reward + gamma * max_next_q - current_q)

##Game drawing and mechanic functions

In [None]:
# Draw star rewards
def draw_star(surface, x, y, size, color):
    points = []
    for i in range(10):
        angle = i * (360 / 10) - 90
        radius = size if i % 2 == 0 else size // 2
        x_point = x + radius * np.cos(np.radians(angle))
        y_point = y + radius * np.sin(np.radians(angle))
        points.append((x_point, y_point))
    pygame.draw.polygon(surface, color, points)

# Update the location and status of all rewards
def move_rewards():
    for reward in rewards:
        reward["x"] = random.randint(100, SCREEN_WIDTH - 130)
        reward["y"] = random.randint(100, SCREEN_HEIGHT // 2)
        reward["size"] = random.randint(20, 40)
        reward["type"] = random.choice(["score", "paddle_extend", "extra_ball"])
        reward["active"] = True

# Detect the collision of the ball with the reward
def ball_reward_collision(ball, reward):
    if reward["active"] and reward["x"] <= ball["x"] <= reward["x"] + reward["size"] and \
       reward["y"] <= ball["y"] <= reward["y"] + reward["size"]:
        reward["active"] = False
        return reward["type"]
    return None

# Draw the game screen
def draw_game():
    screen.fill(BACKGROUND_COLOR)

    # Draw all balls
    for ball in balls:
        pygame.draw.circle(screen, BALL_COLOR, (int(ball["x"]), int(ball["y"])), ball["radius"])

    # Draw baffles
    pygame.draw.rect(screen, PADDLE_COLOR, (int(paddle["x"]), SCREEN_HEIGHT - paddle["height"],
                                          paddle["width"], paddle["height"]))
    # Draw rewards for activities
    for reward in rewards:
        if reward["active"]:
            draw_star(screen, reward["x"] + reward["size"] // 2,
                     reward["y"] + reward["size"] // 2, reward["size"] // 2, REWARD_COLOR)

    # Plot the score and time
    score_text = small_font.render(f"Score: {score}", True, TEXT_COLOR)
    screen.blit(score_text, (10, 10))
    timer_text = small_font.render(f"Time: {int(game_timer - time_elapsed)}", True, TEXT_COLOR)
    screen.blit(timer_text, (SCREEN_WIDTH - 150, 10))

##Main game loop

In [None]:
while running:
    # Handle exit events
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    # Menu interface
    if current_screen == "menu":
        screen.fill(BACKGROUND_COLOR)
        title = font.render("Q-Learning Bouncing Ball Game", True, TEXT_COLOR)
        start_text = small_font.render("Press ENTER to Start", True, TEXT_COLOR)
        exit_text = small_font.render("Press ESC to Exit", True, TEXT_COLOR)
        screen.blit(title, (SCREEN_WIDTH // 2 - title.get_width() // 2, 200))
        screen.blit(start_text, (SCREEN_WIDTH // 2 - start_text.get_width() // 2, 300))
        screen.blit(exit_text, (SCREEN_WIDTH // 2 - exit_text.get_width() // 2, 350))

        # Handle menu input
        keys = pygame.key.get_pressed()
        if keys[pygame.K_RETURN]:
            current_screen = "game"
            score = 0
            balls = [init_ball()]
            paddle = init_paddle()
            rewards = [init_reward() for _ in range(10)]
            time_elapsed = 0
            reward_timer = pygame.time.get_ticks()
            last_reward_collision_timer = pygame.time.get_ticks()
        elif keys[pygame.K_ESCAPE]:
            running = False

    # Game interface
    elif current_screen == "game":
        # Make sure the ball is always on the court
        if not balls:
            balls = [init_ball()]

        # Update all ball positions
        for ball in balls[:]:
            ball["x"] += ball["vx"]
            ball["y"] += ball["vy"]

            # Handle ball boundary collisions
            if ball["x"] <= 0 or ball["x"] >= SCREEN_WIDTH:
                ball["vx"] = -ball["vx"]
            if ball["y"] <= 0:
                ball["vy"] = -ball["vy"]
            if ball["y"] >= SCREEN_HEIGHT:
                balls.remove(ball)
                if len(balls) == 0:
                    score -= 10
                    balls.append(init_ball())

            # Handle the collision between the ball and the baffle
            if paddle["x"] <= ball["x"] <= paddle["x"] + paddle["width"] and \
               ball["y"] + ball["radius"] >= SCREEN_HEIGHT - paddle["height"]:
                ball["vy"] = -abs(ball["vy"])
                score += 5

            # Handle collisions between balls and rewards
            for reward in rewards:
                reward_type = ball_reward_collision(ball, reward)
                if reward_type:
                    last_reward_collision_timer = pygame.time.get_ticks()
                    if reward_type == "score":
                        score += 25
                    elif reward_type == "paddle_extend":
                        paddle["width"] += 30
                    elif reward_type == "extra_ball":
                        balls.append(init_ball())

        # Deal with penalties for missing rewards
        if pygame.time.get_ticks() - last_reward_collision_timer >= missed_reward_penalty:
            score -= 10
            last_reward_collision_timer = pygame.time.get_ticks()

        # Update reward locations regularly
        if pygame.time.get_ticks() - reward_timer >= reward_move_interval:
            move_rewards()
            reward_timer = pygame.time.get_ticks()

        # Q-learning control baffle
        if balls:
            current_state = get_state(balls, paddle)
            if current_state:
                action = choose_action(current_state, balls, paddle)

                # Update the baffle position
                old_x = paddle["x"]
                paddle["x"] += action * paddle["speed"]
                paddle["x"] = max(0, min(SCREEN_WIDTH - paddle["width"], paddle["x"]))

                # Update the Q table
                reward = calculate_reward(balls, paddle, action)
                next_state = get_state(balls, paddle)
                if next_state:
                    update_q_table(current_state, action, reward, next_state)

        # Draw the game screen
        draw_game()

        # Update game time
        time_elapsed += clock.get_time() / 1000
        if time_elapsed >= game_timer:
            current_screen = "end_screen"

    # End interface
    elif current_screen == "end_screen":
        screen.fill(BACKGROUND_COLOR)
        end_text = font.render("Time's Up!", True, TEXT_COLOR)
        score_text = small_font.render(f"Your Score: {score}", True, TEXT_COLOR)
        menu_text = small_font.render("Press R to Return to Menu", True, TEXT_COLOR)
        exit_text = small_font.render("Press ESC to Exit", True, TEXT_COLOR)

        screen.blit(end_text, (SCREEN_WIDTH // 2 - end_text.get_width() // 2, 200))
        screen.blit(score_text, (SCREEN_WIDTH // 2 - score_text.get_width() // 2, 300))
        screen.blit(menu_text, (SCREEN_WIDTH // 2 - menu_text.get_width() // 2, 400))
        screen.blit(exit_text, (SCREEN_WIDTH // 2 - exit_text.get_width() // 2, 450))

        # Processing end interface input
        keys = pygame.key.get_pressed()
        if keys[pygame.K_r]:
            current_screen = "menu"
        elif keys[pygame.K_ESCAPE]:
            running = False

    # Update display
    pygame.display.flip()
    clock.tick(60)

# Quit the game
pygame.quit()