In [None]:
import pygame
import numpy as np
import math
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

In [None]:
import pygame
import math
import numpy as np


class Environment:
    def __init__(self):
        pygame.init()
        self.screen = pygame.display.set_mode((800, 500))  # Larger field
        pygame.display.set_caption("Reinforcement Learning: Car and Ball")
        self.clock = pygame.time.Clock()

        # Load images
        self.car_image = pygame.image.load("car.png")
        self.ball_image = pygame.image.load("ball.png")
        self.goal_image = pygame.image.load("goal.png")
        self.background_image = pygame.image.load("field.jpg")  # Background

        # Scale images
        self.car_image = pygame.transform.scale(self.car_image, (80, 100))  # Larger car
        self.ball_image = pygame.transform.scale(self.ball_image, (70, 60))  # Larger ball
        self.goal_image = pygame.transform.scale(self.goal_image, (150, 100))  # Larger goal
        self.background_image = pygame.transform.scale(self.background_image, (800, 500))

        # Goal positions
        self.goal_left_x, self.goal_left_y = 100, 218  # Left-center goal
        self.goal_right_x, self.goal_right_y = 750, 218  # Right-center goal

        self.reset()

    def reset(self):
        self.car_x, self.car_y, self.car_angle = self.randomize_car()
        self.ball_x, self.ball_y = self.randomize_ball()
        self.car_velocity = 0
        self.car_angular_velocity = 0
        self.ball_velocity_x = 0
        self.ball_velocity_y = 0
        self.kicked = False  # Track whether the ball has been kicked

        self.prev_car_ball_dist = self.get_distance(self.car_x, self.car_y, self.ball_x, self.ball_y)
        return self.get_state()

    def randomize_car(self):
        x = np.random.randint(300, 500)  # Closer to the ball
        y = np.random.randint(150, 350)
        angle = np.random.uniform(0, 360)
        return x, y, angle

    def randomize_ball(self):
        x = np.random.randint(250, 550)  # Closer to the car
        y = np.random.randint(150, 350)
        return x, y

    def get_distance(self, x1, y1, x2, y2):
        return math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

    def get_state(self):
        car_ball_dist = self.get_distance(self.car_x, self.car_y, self.ball_x, self.ball_y)
        return np.array([
            self.car_x, self.car_y, self.car_velocity, self.car_angular_velocity, self.car_angle,
            self.ball_x, self.ball_y, car_ball_dist, 1 if self.ball_x < self.car_x else -1
        ])

    def handle_collisions(self):
        # Ball-wall collision
        if self.ball_x - 35 < 0 or self.ball_x + 35 > 800:
            self.ball_velocity_x = -self.ball_velocity_x
        if self.ball_y - 35 < 0 or self.ball_y + 35 > 500:
            self.ball_velocity_y = -self.ball_velocity_y

    def move_car(self, action):
        if self.kicked:  # Stop the car after a kick
            self.car_velocity = 0
            self.car_angular_velocity = 0
            return 0

        car_speed = 5
        car_angular_speed = 5

        if action == 0:  # Forward
            self.car_velocity = car_speed
        elif action == 1:  # Backward
            self.car_velocity = -car_speed
        elif action == 2:  # Rotate Left
            self.car_angular_velocity = -car_angular_speed
        elif action == 3:  # Rotate Right
            self.car_angular_velocity = car_angular_speed
        elif action == 4:  # Forward + Left
            self.car_velocity = car_speed
            self.car_angular_velocity = -car_angular_speed
        elif action == 5:  # Forward + Right
            self.car_velocity = car_speed
            self.car_angular_velocity = car_angular_speed
        elif action == 6:  # Backward + Left
            self.car_velocity = -car_speed
            self.car_angular_velocity = -car_angular_speed
        elif action == 7:  # Backward + Right
            self.car_velocity = -car_speed
            self.car_angular_velocity = car_angular_speed
        else:  # No action
            self.car_velocity = 0
            self.car_angular_velocity = 0

        self.car_angle += self.car_angular_velocity
        self.car_x += math.cos(math.radians(self.car_angle)) * self.car_velocity
        self.car_y += math.sin(math.radians(self.car_angle)) * self.car_velocity

    def move_ball(self):
        self.ball_x += self.ball_velocity_x
        self.ball_y += self.ball_velocity_y
        self.ball_velocity_x *= 0.98  # Friction
        self.ball_velocity_y *= 0.98  # Friction

    def calculate_reward(self):
        # If the ball hasn't been kicked, calculate distance to the car
        if not self.kicked:
            car_ball_dist = self.get_distance(self.car_x, self.car_y, self.ball_x, self.ball_y)
            return -5 if car_ball_dist > 50 else 0  # Negative reward if far from the ball

        # After kick, calculate distance to goals
        dist_left = self.get_distance(self.ball_x, self.ball_y, self.goal_left_x, self.goal_left_y)
        dist_right = self.get_distance(self.ball_x, self.ball_y, self.goal_right_x, self.goal_right_y)

        if dist_left <= 75 or dist_right <= 75:  # Ball enters a goal
            return 500
        elif min(dist_left, dist_right) <= 150:  # Ball near a goal
            return 100
        elif min(dist_left, dist_right) <= 300:  # Ball far from a goal
            return 50
        else:  # Ball kicked but very far
            return -10

    def check_ball_stopped(self):
        return abs(self.ball_velocity_x) < 0.1 and abs(self.ball_velocity_y) < 0.1

    def step(self, action):
        # Move car and ball
        self.move_car(action)
        self.move_ball()

        # Handle collisions
        self.handle_collisions()

        # Check if ball is kicked
        car_ball_dist = self.get_distance(self.car_x, self.car_y, self.ball_x, self.ball_y)
        if car_ball_dist <= 35 + 50 and not self.kicked:  # Ball radius + car half-width
            self.kicked = True
            angle = math.atan2(self.ball_y - self.car_y, self.ball_x - self.car_x)
            self.ball_velocity_x = math.cos(angle) * 7
            self.ball_velocity_y = math.sin(angle) * 7

        # Calculate reward
        reward = self.calculate_reward()

        # Check if the episode is done
        done = self.kicked and self.check_ball_stopped()

        # Get the next state
        next_state = self.get_state()

        return next_state, reward, done

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

        # Draw background
        self.screen.blit(self.background_image, (0, 0))

        # Draw goals
        left_goal_rotated = pygame.transform.rotate(self.goal_image, 90)  # Left goal rotated
        right_goal_rotated = pygame.transform.rotate(self.goal_image, -90)  # Right goal rotated
        self.screen.blit(left_goal_rotated, (self.goal_left_x - 75, self.goal_left_y - 50))
        self.screen.blit(right_goal_rotated, (self.goal_right_x - 75, self.goal_right_y - 50))

        # Draw ball
        self.screen.blit(self.ball_image, (self.ball_x - 35, self.ball_y - 30))

        # Draw car
        rotated_car = pygame.transform.rotate(self.car_image, -self.car_angle)
        car_rect = rotated_car.get_rect(center=(self.car_x, self.car_y))
        self.screen.blit(rotated_car, car_rect.topleft)

        pygame.display.flip()
        self.clock.tick(60)

In [None]:
# Deep Q-Network Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return

        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [None]:
def train_dqn(episodes=500, render_interval=10):
    env = Environment()
    state_size = 9  # Updated to match the size of the state array
    action_size = 8  # Number of actions
    agent = DQNAgent(state_size, action_size)
    batch_size = 32

    # Tracking metrics
    rewards = []
    kicks = []
    goals = []
    avg_distances = []

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])  # Updated to match the new state size
        total_reward = 0
        kicked = False
        goal = False
        episode_kicks = 0
        total_distance = 0
        step_count = 0

        for time in range(500):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])  # Updated to match the new state size

            # Check if the ball was kicked
            car_ball_dist = env.get_distance(env.car_x, env.car_y, env.ball_x, env.ball_y)
            total_distance += car_ball_dist
            step_count += 1

            if car_ball_dist <= 35 + 50 and not kicked:  # Ball radius + car half-width
                kicked = True
                episode_kicks += 1

            # Check if the goal was achieved
            if env.check_ball_stopped() and env.kicked:
                goal = True

            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if done:
                break

            if e % render_interval == 0:
                env.render()

        # Replay memory
        agent.replay(batch_size)

        # Append metrics
        rewards.append(total_reward)
        kicks.append(episode_kicks)
        goals.append(1 if goal else 0)
        avg_distances.append(total_distance / step_count if step_count > 0 else 0)

        # Generate training log message
        log_message = f"Episode: {e + 1}/{episodes}, Reward: {total_reward:.2f}, Epsilon: {agent.epsilon:.4f}"
        if kicked:
            log_message += ", kicked"
        if goal:
            log_message += ", goal"

        print(log_message)

        # Save the model every 50 episodes
        if (e + 1) % 50 == 0:
            agent.model.save(f"dqn_model_episode_{e+1}.h5")
            print(f"Model saved at episode {e+1}")

    # Plot training metrics
    plt.figure(figsize=(12, 8))
    plt.subplot(2, 2, 1)
    plt.plot(rewards, label="Rewards")
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    plt.title('Rewards per Episode')
    plt.legend()

    plt.subplot(2, 2, 2)
    plt.plot(kicks, label="Kicks")
    plt.xlabel('Episode')
    plt.ylabel('Kicks')
    plt.title('Kicks per Episode')
    plt.legend()

    plt.subplot(2, 2, 3)
    plt.plot(goals, label="Goals")
    plt.xlabel('Episode')
    plt.ylabel('Goals')
    plt.title('Goals per Episode')
    plt.legend()

    plt.subplot(2, 2, 4)
    plt.plot(avg_distances, label="Average Distance")
    plt.xlabel('Episode')
    plt.ylabel('Distance')
    plt.title('Average Car-to-Ball Distance')
    plt.legend()

    plt.tight_layout()
    plt.show()


In [None]:
if __name__ == "__main__":
    train_dqn(episodes=500)

In [None]:
# Testing Trained Model
model = tf.keras.models.load_model("dqn_model_episode_100.h5")
env = Environment()
state_size = 9
action_size = 8
state = env.reset()
state = np.reshape(state, [1, state_size])
done = False
total_reward = 0

while not done:
    env.render()
    action = np.argmax(model.predict(state)[0])
    next_state, reward, done = env.step(action)
    state = np.reshape(next_state, [1, state_size])
    total_reward += reward

print(f"Total Reward: {total_reward}")
pygame.quit()